All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06  3:26 ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-06  3:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, linux-sgx,
	jaharkes, Jarkko Sakkinen, linux-mips, linux-kernel, intel-gfx,
	dri-devel, codalist, linux-unionfs, linux-fsdevel, linux-mm

Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
v2:
-       if (!ret && do_populate)
+       if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c                    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c                             |  2 +-
 fs/overlayfs/file.c                        |  2 +-
 include/linux/fs.h                         | 10 ++++++++--
 include/linux/mm.h                         |  2 +-
 ipc/shm.c                                  |  2 +-
 mm/mmap.c                                  | 10 +++++-----
 mm/nommu.c                                 |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
 				VM_READ | VM_EXEC |
 				VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-				0, NULL);
+				0, NULL, false);
 		if (IS_ERR_VALUE(base)) {
 			ret = base;
 			goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = call_mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma, false);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	spin_unlock(&cii->c_lock);
 
 	vma->vm_file = get_file(host_file);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 
 	if (ret) {
 		/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	vma_set_file(vma, realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	revert_creds(old_cred);
 	ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..4c6a3339373d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*populate)(struct file *, struct vm_area_struct *);
 	unsigned long mmap_supported_flags;
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
 {
-	return file->f_op->mmap(file, vma);
+	int ret = file->f_op->mmap(file, vma);
+
+	if (!ret && do_populate && file->f_op->populate)
+		ret = file->f_op->populate(file, vma);
+
+	return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-	struct list_head *uf);
+	struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long pgoff, unsigned long *populate, struct list_head *uf);
diff --git a/ipc/shm.c b/ipc/shm.c
index b3048ebd5c31..89b28f32acf0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -587,7 +587,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = call_mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma, do_populate);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index 1e8fdb0b51ed..5eca79957d4c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1413,6 +1413,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long flags, unsigned long pgoff,
 			unsigned long *populate, struct list_head *uf)
 {
+	bool do_populate = (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE;
 	struct mm_struct *mm = current->mm;
 	vm_flags_t vm_flags;
 	int pkey = 0;
@@ -1579,10 +1580,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			vm_flags |= VM_NORESERVE;
 	}
 
-	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
+	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, do_populate);
 	if (!IS_ERR_VALUE(addr) &&
-	    ((vm_flags & VM_LOCKED) ||
-	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+	    ((vm_flags & VM_LOCKED) || do_populate))
 		*populate = len;
 	return addr;
 }
@@ -1721,7 +1721,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-		struct list_head *uf)
+		struct list_head *uf, bool do_populate)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev, *merge;
@@ -1790,7 +1790,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		}
 
 		vma->vm_file = get_file(file);
-		error = call_mmap(file, vma);
+		error = call_mmap(file, vma, do_populate);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 55a9e48a7a02..a3c20b803c27 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -941,7 +941,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -972,7 +972,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = call_mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma, false);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06  3:26 ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-06  3:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Jarkko Sakkinen, linux-fsdevel, Reinette Chatre, codalist,
	linux-sgx

Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
v2:
-       if (!ret && do_populate)
+       if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c                    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c                             |  2 +-
 fs/overlayfs/file.c                        |  2 +-
 include/linux/fs.h                         | 10 ++++++++--
 include/linux/mm.h                         |  2 +-
 ipc/shm.c                                  |  2 +-
 mm/mmap.c                                  | 10 +++++-----
 mm/nommu.c                                 |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
 				VM_READ | VM_EXEC |
 				VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-				0, NULL);
+				0, NULL, false);
 		if (IS_ERR_VALUE(base)) {
 			ret = base;
 			goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = call_mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma, false);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	spin_unlock(&cii->c_lock);
 
 	vma->vm_file = get_file(host_file);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 
 	if (ret) {
 		/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	vma_set_file(vma, realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	revert_creds(old_cred);
 	ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..4c6a3339373d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*populate)(struct file *, struct vm_area_struct *);
 	unsigned long mmap_supported_flags;
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
 {
-	return file->f_op->mmap(file, vma);
+	int ret = file->f_op->mmap(file, vma);
+
+	if (!ret && do_populate && file->f_op->populate)
+		ret = file->f_op->populate(file, vma);
+
+	return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-	struct list_head *uf);
+	struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long pgoff, unsigned long *populate, struct list_head *uf);
diff --git a/ipc/shm.c b/ipc/shm.c
index b3048ebd5c31..89b28f32acf0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -587,7 +587,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = call_mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma, do_populate);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index 1e8fdb0b51ed..5eca79957d4c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1413,6 +1413,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long flags, unsigned long pgoff,
 			unsigned long *populate, struct list_head *uf)
 {
+	bool do_populate = (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE;
 	struct mm_struct *mm = current->mm;
 	vm_flags_t vm_flags;
 	int pkey = 0;
@@ -1579,10 +1580,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			vm_flags |= VM_NORESERVE;
 	}
 
-	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
+	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, do_populate);
 	if (!IS_ERR_VALUE(addr) &&
-	    ((vm_flags & VM_LOCKED) ||
-	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+	    ((vm_flags & VM_LOCKED) || do_populate))
 		*populate = len;
 	return addr;
 }
@@ -1721,7 +1721,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-		struct list_head *uf)
+		struct list_head *uf, bool do_populate)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev, *merge;
@@ -1790,7 +1790,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		}
 
 		vma->vm_file = get_file(file);
-		error = call_mmap(file, vma);
+		error = call_mmap(file, vma, do_populate);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 55a9e48a7a02..a3c20b803c27 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -941,7 +941,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -972,7 +972,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = call_mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma, false);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06  3:26 ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-06  3:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Jarkko Sakkinen, linux-fsdevel, Reinette Chatre, codalist,
	linux-sgx

Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
v2:
-       if (!ret && do_populate)
+       if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c                    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c                             |  2 +-
 fs/overlayfs/file.c                        |  2 +-
 include/linux/fs.h                         | 10 ++++++++--
 include/linux/mm.h                         |  2 +-
 ipc/shm.c                                  |  2 +-
 mm/mmap.c                                  | 10 +++++-----
 mm/nommu.c                                 |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
 				VM_READ | VM_EXEC |
 				VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-				0, NULL);
+				0, NULL, false);
 		if (IS_ERR_VALUE(base)) {
 			ret = base;
 			goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = call_mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma, false);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	spin_unlock(&cii->c_lock);
 
 	vma->vm_file = get_file(host_file);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 
 	if (ret) {
 		/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	vma_set_file(vma, realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	revert_creds(old_cred);
 	ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..4c6a3339373d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*populate)(struct file *, struct vm_area_struct *);
 	unsigned long mmap_supported_flags;
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
 {
-	return file->f_op->mmap(file, vma);
+	int ret = file->f_op->mmap(file, vma);
+
+	if (!ret && do_populate && file->f_op->populate)
+		ret = file->f_op->populate(file, vma);
+
+	return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-	struct list_head *uf);
+	struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long pgoff, unsigned long *populate, struct list_head *uf);
diff --git a/ipc/shm.c b/ipc/shm.c
index b3048ebd5c31..89b28f32acf0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -587,7 +587,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = call_mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma, do_populate);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index 1e8fdb0b51ed..5eca79957d4c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1413,6 +1413,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			unsigned long flags, unsigned long pgoff,
 			unsigned long *populate, struct list_head *uf)
 {
+	bool do_populate = (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE;
 	struct mm_struct *mm = current->mm;
 	vm_flags_t vm_flags;
 	int pkey = 0;
@@ -1579,10 +1580,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 			vm_flags |= VM_NORESERVE;
 	}
 
-	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
+	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, do_populate);
 	if (!IS_ERR_VALUE(addr) &&
-	    ((vm_flags & VM_LOCKED) ||
-	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+	    ((vm_flags & VM_LOCKED) || do_populate))
 		*populate = len;
 	return addr;
 }
@@ -1721,7 +1721,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-		struct list_head *uf)
+		struct list_head *uf, bool do_populate)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev, *merge;
@@ -1790,7 +1790,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		}
 
 		vma->vm_file = get_file(file);
-		error = call_mmap(file, vma);
+		error = call_mmap(file, vma, do_populate);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 55a9e48a7a02..a3c20b803c27 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -941,7 +941,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = call_mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma, false);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -972,7 +972,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = call_mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma, false);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06  3:26 ` Jarkko Sakkinen
  (?)
@ 2022-03-06 23:24   ` Andrew Morton
  -1 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2022-03-06 23:24 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, linux-sgx,
	jaharkes, linux-mips, linux-kernel, intel-gfx, dri-devel,
	codalist, linux-unionfs, linux-fsdevel, linux-mm

On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:

> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> initialize the device memory in some specific manner. SGX driver can use
> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> page in the address range.

Why is this useful?  Please fully describe the benefit to kernel users.
Convince us that the benefit justifies the code churn, maintenance
cost and larger kernel footprint.

Do we know of any other drivers which might use this?

> Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> it conditionally called inside call_mmap(). Update call sites
> accodingly.

spello

> -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
>  {
> -	return file->f_op->mmap(file, vma);
> +	int ret = file->f_op->mmap(file, vma);
> +
> +	if (!ret && do_populate && file->f_op->populate)
> +		ret = file->f_op->populate(file, vma);
> +
> +	return ret;
>  }

Should this still be inlined?



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06 23:24   ` Andrew Morton
  0 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2022-03-06 23:24 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:

> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> initialize the device memory in some specific manner. SGX driver can use
> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> page in the address range.

Why is this useful?  Please fully describe the benefit to kernel users.
Convince us that the benefit justifies the code churn, maintenance
cost and larger kernel footprint.

Do we know of any other drivers which might use this?

> Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> it conditionally called inside call_mmap(). Update call sites
> accodingly.

spello

> -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
>  {
> -	return file->f_op->mmap(file, vma);
> +	int ret = file->f_op->mmap(file, vma);
> +
> +	if (!ret && do_populate && file->f_op->populate)
> +		ret = file->f_op->populate(file, vma);
> +
> +	return ret;
>  }

Should this still be inlined?



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06 23:24   ` Andrew Morton
  0 siblings, 0 replies; 28+ messages in thread
From: Andrew Morton @ 2022-03-06 23:24 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:

> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> initialize the device memory in some specific manner. SGX driver can use
> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> page in the address range.

Why is this useful?  Please fully describe the benefit to kernel users.
Convince us that the benefit justifies the code churn, maintenance
cost and larger kernel footprint.

Do we know of any other drivers which might use this?

> Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> it conditionally called inside call_mmap(). Update call sites
> accodingly.

spello

> -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
>  {
> -	return file->f_op->mmap(file, vma);
> +	int ret = file->f_op->mmap(file, vma);
> +
> +	if (!ret && do_populate && file->f_op->populate)
> +		ret = file->f_op->populate(file, vma);
> +
> +	return ret;
>  }

Should this still be inlined?



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06 23:24   ` Andrew Morton
  (?)
@ 2022-03-06 23:41     ` Dave Hansen
  -1 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-06 23:41 UTC (permalink / raw)
  To: Andrew Morton, Jarkko Sakkinen
  Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, linux-sgx,
	jaharkes, linux-mips, linux-kernel, intel-gfx, dri-devel,
	codalist, linux-unionfs, linux-fsdevel, linux-mm

On 3/6/22 15:24, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
>> Sometimes you might want to use MAP_POPULATE to ask a device driver to
>> initialize the device memory in some specific manner. SGX driver can use
>> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
>> page in the address range.
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.

In short: page faults stink.  The core kernel has lots of ways of
avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
 But, those only work on normal RAM that the core mm manages.

SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
are marked with VM_IO.  So, none of the existing methods for avoiding
page faults work on SGX memory.

This essentially helps extend existing "normal RAM" kernel ABIs to work
for avoiding faults for SGX too.  SGX users want to enjoy all of the
benefits of a delayed allocation policy (better resource use,
overcommit, NUMA affinity) but without the cost of millions of faults.

That said, this isn't how I would have implemented it.  I probably would
have hooked in to populate_vma_page_range() or its callers.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06 23:41     ` Dave Hansen
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-06 23:41 UTC (permalink / raw)
  To: Andrew Morton, Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On 3/6/22 15:24, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
>> Sometimes you might want to use MAP_POPULATE to ask a device driver to
>> initialize the device memory in some specific manner. SGX driver can use
>> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
>> page in the address range.
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.

In short: page faults stink.  The core kernel has lots of ways of
avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
 But, those only work on normal RAM that the core mm manages.

SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
are marked with VM_IO.  So, none of the existing methods for avoiding
page faults work on SGX memory.

This essentially helps extend existing "normal RAM" kernel ABIs to work
for avoiding faults for SGX too.  SGX users want to enjoy all of the
benefits of a delayed allocation policy (better resource use,
overcommit, NUMA affinity) but without the cost of millions of faults.

That said, this isn't how I would have implemented it.  I probably would
have hooked in to populate_vma_page_range() or its callers.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-06 23:41     ` Dave Hansen
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-06 23:41 UTC (permalink / raw)
  To: Andrew Morton, Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On 3/6/22 15:24, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
>> Sometimes you might want to use MAP_POPULATE to ask a device driver to
>> initialize the device memory in some specific manner. SGX driver can use
>> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
>> page in the address range.
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.

In short: page faults stink.  The core kernel has lots of ways of
avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
 But, those only work on normal RAM that the core mm manages.

SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
are marked with VM_IO.  So, none of the existing methods for avoiding
page faults work on SGX memory.

This essentially helps extend existing "normal RAM" kernel ABIs to work
for avoiding faults for SGX too.  SGX users want to enjoy all of the
benefits of a delayed allocation policy (better resource use,
overcommit, NUMA affinity) but without the cost of millions of faults.

That said, this isn't how I would have implemented it.  I probably would
have hooked in to populate_vma_page_range() or its callers.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06 23:41     ` Dave Hansen
  (?)
@ 2022-03-07 11:27       ` Jarkko Sakkinen
  -1 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 11:27 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Dave Hansen, Nathaniel McCallum, Reinette Chatre,
	linux-sgx, jaharkes, linux-mips, linux-kernel, intel-gfx,
	dri-devel, codalist, linux-unionfs, linux-fsdevel, linux-mm

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> On 3/6/22 15:24, Andrew Morton wrote:
> > On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> > 
> >> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> >> initialize the device memory in some specific manner. SGX driver can use
> >> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> >> page in the address range.
> > Why is this useful?  Please fully describe the benefit to kernel users.
> > Convince us that the benefit justifies the code churn, maintenance
> > cost and larger kernel footprint.
> 
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> That said, this isn't how I would have implemented it.  I probably would
> have hooked in to populate_vma_page_range() or its callers.

The exact implementation path is not driver in this. I'm open for
better options. The point of these patches is more to show an issue
rather than solution, and they do carry RFC because of that.

Hooking into populate_vma_page_range() does sound like a better idea,
because then it would be nicely embedded into __mm_populate() and
other functionality that calls that function.

But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
filtered out and never reach that function.

I don't know unorthodox that'd be but could we perhaps have a VM
flag for SGX?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 11:27       ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 11:27 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> On 3/6/22 15:24, Andrew Morton wrote:
> > On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> > 
> >> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> >> initialize the device memory in some specific manner. SGX driver can use
> >> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> >> page in the address range.
> > Why is this useful?  Please fully describe the benefit to kernel users.
> > Convince us that the benefit justifies the code churn, maintenance
> > cost and larger kernel footprint.
> 
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> That said, this isn't how I would have implemented it.  I probably would
> have hooked in to populate_vma_page_range() or its callers.

The exact implementation path is not driver in this. I'm open for
better options. The point of these patches is more to show an issue
rather than solution, and they do carry RFC because of that.

Hooking into populate_vma_page_range() does sound like a better idea,
because then it would be nicely embedded into __mm_populate() and
other functionality that calls that function.

But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
filtered out and never reach that function.

I don't know unorthodox that'd be but could we perhaps have a VM
flag for SGX?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 11:27       ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 11:27 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> On 3/6/22 15:24, Andrew Morton wrote:
> > On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> > 
> >> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> >> initialize the device memory in some specific manner. SGX driver can use
> >> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> >> page in the address range.
> > Why is this useful?  Please fully describe the benefit to kernel users.
> > Convince us that the benefit justifies the code churn, maintenance
> > cost and larger kernel footprint.
> 
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> That said, this isn't how I would have implemented it.  I probably would
> have hooked in to populate_vma_page_range() or its callers.

The exact implementation path is not driver in this. I'm open for
better options. The point of these patches is more to show an issue
rather than solution, and they do carry RFC because of that.

Hooking into populate_vma_page_range() does sound like a better idea,
because then it would be nicely embedded into __mm_populate() and
other functionality that calls that function.

But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
filtered out and never reach that function.

I don't know unorthodox that'd be but could we perhaps have a VM
flag for SGX?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06 23:24   ` Andrew Morton
  (?)
@ 2022-03-07 13:00     ` Jarkko Sakkinen
  -1 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 13:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, linux-sgx,
	jaharkes, linux-mips, linux-kernel, intel-gfx, dri-devel,
	codalist, linux-unionfs, linux-fsdevel, linux-mm

On Sun, Mar 06, 2022 at 03:24:56PM -0800, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> 
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.
> 
> Do we know of any other drivers which might use this?

Brutal honesty: I don't know if any other drivers would use this but
neither I would not be surprised if they did. The need for this might
very well be "masked" by ioctl API's.  I was first proposing a ioctl
for this but Dave suggested to at least try out this route.

> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> spello

Thanks, I noticed that but did not want to spam with a new version just
because of that :-)

> 
> > -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> > +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
> >  {
> > -	return file->f_op->mmap(file, vma);
> > +	int ret = file->f_op->mmap(file, vma);
> > +
> > +	if (!ret && do_populate && file->f_op->populate)
> > +		ret = file->f_op->populate(file, vma);
> > +
> > +	return ret;
> >  }
> 
> Should this still be inlined?

I think it might make sense at least to have call_mmap_populate() so and
mmap_region_populate() instead of putting that boolean parameter to every
flow (based on Greg's feedback). But only if this implementation approach
is used in the first place.

As said, I chose to use RFC to pinpoint a bottleneck for us, not claiming
that this would be the best possible way to work around it.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 13:00     ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 13:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On Sun, Mar 06, 2022 at 03:24:56PM -0800, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> 
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.
> 
> Do we know of any other drivers which might use this?

Brutal honesty: I don't know if any other drivers would use this but
neither I would not be surprised if they did. The need for this might
very well be "masked" by ioctl API's.  I was first proposing a ioctl
for this but Dave suggested to at least try out this route.

> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> spello

Thanks, I noticed that but did not want to spam with a new version just
because of that :-)

> 
> > -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> > +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
> >  {
> > -	return file->f_op->mmap(file, vma);
> > +	int ret = file->f_op->mmap(file, vma);
> > +
> > +	if (!ret && do_populate && file->f_op->populate)
> > +		ret = file->f_op->populate(file, vma);
> > +
> > +	return ret;
> >  }
> 
> Should this still be inlined?

I think it might make sense at least to have call_mmap_populate() so and
mmap_region_populate() instead of putting that boolean parameter to every
flow (based on Greg's feedback). But only if this implementation approach
is used in the first place.

As said, I chose to use RFC to pinpoint a bottleneck for us, not claiming
that this would be the best possible way to work around it.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 13:00     ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 13:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Reinette Chatre, linux-sgx

On Sun, Mar 06, 2022 at 03:24:56PM -0800, Andrew Morton wrote:
> On Sun,  6 Mar 2022 05:26:55 +0200 Jarkko Sakkinen <jarkko@kernel.org> wrote:
> 
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> 
> Why is this useful?  Please fully describe the benefit to kernel users.
> Convince us that the benefit justifies the code churn, maintenance
> cost and larger kernel footprint.
> 
> Do we know of any other drivers which might use this?

Brutal honesty: I don't know if any other drivers would use this but
neither I would not be surprised if they did. The need for this might
very well be "masked" by ioctl API's.  I was first proposing a ioctl
for this but Dave suggested to at least try out this route.

> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> spello

Thanks, I noticed that but did not want to spam with a new version just
because of that :-)

> 
> > -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> > +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
> >  {
> > -	return file->f_op->mmap(file, vma);
> > +	int ret = file->f_op->mmap(file, vma);
> > +
> > +	if (!ret && do_populate && file->f_op->populate)
> > +		ret = file->f_op->populate(file, vma);
> > +
> > +	return ret;
> >  }
> 
> Should this still be inlined?

I think it might make sense at least to have call_mmap_populate() so and
mmap_region_populate() instead of putting that boolean parameter to every
flow (based on Greg's feedback). But only if this implementation approach
is used in the first place.

As said, I chose to use RFC to pinpoint a bottleneck for us, not claiming
that this would be the best possible way to work around it.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06 23:41     ` Dave Hansen
  (?)
@ 2022-03-07 14:37       ` Matthew Wilcox
  -1 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-03-07 14:37 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Jarkko Sakkinen, Dave Hansen, Nathaniel McCallum,
	Reinette Chatre, linux-sgx, jaharkes, linux-mips, linux-kernel,
	intel-gfx, dri-devel, codalist, linux-unionfs, linux-fsdevel,
	linux-mm

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.

We have a mechanism for dynamically reducing the number of page faults
already; it's just buried in the page cache code.  You have vma->vm_file,
which contains a file_ra_state.  You can use this to track where
recent faults have been and grow the size of the region you fault in
per page fault.  You don't have to (indeed probably don't want to) use
the same algorithm as the page cache, but the _principle_ is the same --
were recent speculative faults actually used; should we grow the number
of pages actually faulted in, or is this a random sparse workload where
we want to allocate individual pages.

Don't rely on the user to ask.  They don't know.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 14:37       ` Matthew Wilcox
  0 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-03-07 14:37 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Jarkko Sakkinen, linux-fsdevel, Andrew Morton, Reinette Chatre,
	linux-sgx

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.

We have a mechanism for dynamically reducing the number of page faults
already; it's just buried in the page cache code.  You have vma->vm_file,
which contains a file_ra_state.  You can use this to track where
recent faults have been and grow the size of the region you fault in
per page fault.  You don't have to (indeed probably don't want to) use
the same algorithm as the page cache, but the _principle_ is the same --
were recent speculative faults actually used; should we grow the number
of pages actually faulted in, or is this a random sparse workload where
we want to allocate individual pages.

Don't rely on the user to ask.  They don't know.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 14:37       ` Matthew Wilcox
  0 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-03-07 14:37 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Jarkko Sakkinen, linux-fsdevel, Andrew Morton, Reinette Chatre,
	linux-sgx

On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.

We have a mechanism for dynamically reducing the number of page faults
already; it's just buried in the page cache code.  You have vma->vm_file,
which contains a file_ra_state.  You can use this to track where
recent faults have been and grow the size of the region you fault in
per page fault.  You don't have to (indeed probably don't want to) use
the same algorithm as the page cache, but the _principle_ is the same --
were recent speculative faults actually used; should we grow the number
of pages actually faulted in, or is this a random sparse workload where
we want to allocate individual pages.

Don't rely on the user to ask.  They don't know.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-07 11:27       ` Jarkko Sakkinen
  (?)
@ 2022-03-07 15:29         ` Dave Hansen
  -1 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-07 15:29 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Andrew Morton, Dave Hansen, Nathaniel McCallum, Reinette Chatre,
	linux-sgx, jaharkes, linux-mips, linux-kernel, intel-gfx,
	dri-devel, codalist, linux-unionfs, linux-fsdevel, linux-mm

On 3/7/22 03:27, Jarkko Sakkinen wrote:
> But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> filtered out and never reach that function.
> 
> I don't know unorthodox that'd be but could we perhaps have a VM
> flag for SGX?

SGX only works on a subset of the chips from one vendor on one
architecture.  That doesn't seem worth burning a VM flag.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:29         ` Dave Hansen
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-07 15:29 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On 3/7/22 03:27, Jarkko Sakkinen wrote:
> But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> filtered out and never reach that function.
> 
> I don't know unorthodox that'd be but could we perhaps have a VM
> flag for SGX?

SGX only works on a subset of the chips from one vendor on one
architecture.  That doesn't seem worth burning a VM flag.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:29         ` Dave Hansen
  0 siblings, 0 replies; 28+ messages in thread
From: Dave Hansen @ 2022-03-07 15:29 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On 3/7/22 03:27, Jarkko Sakkinen wrote:
> But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> filtered out and never reach that function.
> 
> I don't know unorthodox that'd be but could we perhaps have a VM
> flag for SGX?

SGX only works on a subset of the chips from one vendor on one
architecture.  That doesn't seem worth burning a VM flag.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-07 14:37       ` Matthew Wilcox
  (?)
@ 2022-03-07 15:43         ` Jarkko Sakkinen
  -1 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:43 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Dave Hansen, Andrew Morton, Dave Hansen, Nathaniel McCallum,
	Reinette Chatre, linux-sgx, jaharkes, linux-mips, linux-kernel,
	intel-gfx, dri-devel, codalist, linux-unionfs, linux-fsdevel,
	linux-mm

On Mon, Mar 07, 2022 at 02:37:48PM +0000, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> > In short: page faults stink.  The core kernel has lots of ways of
> > avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
> >  But, those only work on normal RAM that the core mm manages.
> > 
> > SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> > have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> > are marked with VM_IO.  So, none of the existing methods for avoiding
> > page faults work on SGX memory.
> > 
> > This essentially helps extend existing "normal RAM" kernel ABIs to work
> > for avoiding faults for SGX too.  SGX users want to enjoy all of the
> > benefits of a delayed allocation policy (better resource use,
> > overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> We have a mechanism for dynamically reducing the number of page faults
> already; it's just buried in the page cache code.  You have vma->vm_file,
> which contains a file_ra_state.  You can use this to track where
> recent faults have been and grow the size of the region you fault in
> per page fault.  You don't have to (indeed probably don't want to) use
> the same algorithm as the page cache, but the _principle_ is the same --
> were recent speculative faults actually used; should we grow the number
> of pages actually faulted in, or is this a random sparse workload where
> we want to allocate individual pages.
> 
> Don't rely on the user to ask.  They don't know.

This sounds like a possibility. I'll need to study it properly first
though. Thank you for pointing this out.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:43         ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:43 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Dave Hansen, linux-fsdevel, Andrew Morton, Reinette Chatre,
	linux-sgx

On Mon, Mar 07, 2022 at 02:37:48PM +0000, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> > In short: page faults stink.  The core kernel has lots of ways of
> > avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
> >  But, those only work on normal RAM that the core mm manages.
> > 
> > SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> > have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> > are marked with VM_IO.  So, none of the existing methods for avoiding
> > page faults work on SGX memory.
> > 
> > This essentially helps extend existing "normal RAM" kernel ABIs to work
> > for avoiding faults for SGX too.  SGX users want to enjoy all of the
> > benefits of a delayed allocation policy (better resource use,
> > overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> We have a mechanism for dynamically reducing the number of page faults
> already; it's just buried in the page cache code.  You have vma->vm_file,
> which contains a file_ra_state.  You can use this to track where
> recent faults have been and grow the size of the region you fault in
> per page fault.  You don't have to (indeed probably don't want to) use
> the same algorithm as the page cache, but the _principle_ is the same --
> were recent speculative faults actually used; should we grow the number
> of pages actually faulted in, or is this a random sparse workload where
> we want to allocate individual pages.
> 
> Don't rely on the user to ask.  They don't know.

This sounds like a possibility. I'll need to study it properly first
though. Thank you for pointing this out.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:43         ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:43 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	Dave Hansen, linux-fsdevel, Andrew Morton, Reinette Chatre,
	linux-sgx

On Mon, Mar 07, 2022 at 02:37:48PM +0000, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> > In short: page faults stink.  The core kernel has lots of ways of
> > avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
> >  But, those only work on normal RAM that the core mm manages.
> > 
> > SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> > have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> > are marked with VM_IO.  So, none of the existing methods for avoiding
> > page faults work on SGX memory.
> > 
> > This essentially helps extend existing "normal RAM" kernel ABIs to work
> > for avoiding faults for SGX too.  SGX users want to enjoy all of the
> > benefits of a delayed allocation policy (better resource use,
> > overcommit, NUMA affinity) but without the cost of millions of faults.
> 
> We have a mechanism for dynamically reducing the number of page faults
> already; it's just buried in the page cache code.  You have vma->vm_file,
> which contains a file_ra_state.  You can use this to track where
> recent faults have been and grow the size of the region you fault in
> per page fault.  You don't have to (indeed probably don't want to) use
> the same algorithm as the page cache, but the _principle_ is the same --
> were recent speculative faults actually used; should we grow the number
> of pages actually faulted in, or is this a random sparse workload where
> we want to allocate individual pages.
> 
> Don't rely on the user to ask.  They don't know.

This sounds like a possibility. I'll need to study it properly first
though. Thank you for pointing this out.

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-07 15:29         ` Dave Hansen
  (?)
@ 2022-03-07 15:44           ` Jarkko Sakkinen
  -1 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:44 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Dave Hansen, Nathaniel McCallum, Reinette Chatre,
	linux-sgx, jaharkes, linux-mips, linux-kernel, intel-gfx,
	dri-devel, codalist, linux-unionfs, linux-fsdevel, linux-mm

On Mon, Mar 07, 2022 at 07:29:22AM -0800, Dave Hansen wrote:
> On 3/7/22 03:27, Jarkko Sakkinen wrote:
> > But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> > filtered out and never reach that function.
> > 
> > I don't know unorthodox that'd be but could we perhaps have a VM
> > flag for SGX?
> 
> SGX only works on a subset of the chips from one vendor on one
> architecture.  That doesn't seem worth burning a VM flag.

What do you think of Matthew's idea of using ra_state for prediction?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:44           ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:44 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On Mon, Mar 07, 2022 at 07:29:22AM -0800, Dave Hansen wrote:
> On 3/7/22 03:27, Jarkko Sakkinen wrote:
> > But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> > filtered out and never reach that function.
> > 
> > I don't know unorthodox that'd be but could we perhaps have a VM
> > flag for SGX?
> 
> SGX only works on a subset of the chips from one vendor on one
> architecture.  That doesn't seem worth burning a VM flag.

What do you think of Matthew's idea of using ra_state for prediction?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()
@ 2022-03-07 15:44           ` Jarkko Sakkinen
  0 siblings, 0 replies; 28+ messages in thread
From: Jarkko Sakkinen @ 2022-03-07 15:44 UTC (permalink / raw)
  To: Dave Hansen
  Cc: codalist, jaharkes, Nathaniel McCallum, linux-unionfs, intel-gfx,
	Dave Hansen, linux-mips, dri-devel, linux-kernel, linux-mm,
	linux-fsdevel, Andrew Morton, Reinette Chatre, linux-sgx

On Mon, Mar 07, 2022 at 07:29:22AM -0800, Dave Hansen wrote:
> On 3/7/22 03:27, Jarkko Sakkinen wrote:
> > But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> > filtered out and never reach that function.
> > 
> > I don't know unorthodox that'd be but could we perhaps have a VM
> > flag for SGX?
> 
> SGX only works on a subset of the chips from one vendor on one
> architecture.  That doesn't seem worth burning a VM flag.

What do you think of Matthew's idea of using ra_state for prediction?

BR, Jarkko

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH RFC v2] mm: Add f_ops->populate()
  2022-03-06  3:26 ` Jarkko Sakkinen
                   ` (2 preceding siblings ...)
  (?)
@ 2022-03-08  8:28 ` kernel test robot
  -1 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2022-03-08  8:28 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: llvm, kbuild-all

Hi Jarkko,

[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Jarkko-Sakkinen/mm-Add-f_ops-populate/20220307-150639
base:   https://github.com/hnaz/linux-mm master
config: riscv-randconfig-r042-20220307 (https://download.01.org/0day-ci/archive/20220308/202203081636.B87sz0BM-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d271fc04d5b97b12e6b797c6067d3c96a8d7470e)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/a051c250930ed53847224b09aaa43927a25e6c63
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Jarkko-Sakkinen/mm-Add-f_ops-populate/20220307-150639
        git checkout a051c250930ed53847224b09aaa43927a25e6c63
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

>> ipc/shm.c:590:34: error: use of undeclared identifier 'do_populate'; did you mean 'mm_populate'?
           ret = call_mmap(sfd->file, vma, do_populate);
                                           ^~~~~~~~~~~
                                           mm_populate
   include/linux/mm.h:2698:20: note: 'mm_populate' declared here
   static inline void mm_populate(unsigned long addr, unsigned long len)
                      ^
>> ipc/shm.c:590:34: warning: address of function 'mm_populate' will always evaluate to 'true' [-Wpointer-bool-conversion]
           ret = call_mmap(sfd->file, vma, do_populate);
                 ~~~~~~~~~                 ^~~~~~~~~~~
   ipc/shm.c:590:34: note: prefix with the address-of operator to silence this warning
           ret = call_mmap(sfd->file, vma, do_populate);
                                           ^
                                           &
   1 warning and 1 error generated.


vim +590 ipc/shm.c

   575	
   576	static int shm_mmap(struct file *file, struct vm_area_struct *vma)
   577	{
   578		struct shm_file_data *sfd = shm_file_data(file);
   579		int ret;
   580	
   581		/*
   582		 * In case of remap_file_pages() emulation, the file can represent an
   583		 * IPC ID that was removed, and possibly even reused by another shm
   584		 * segment already.  Propagate this case as an error to caller.
   585		 */
   586		ret = __shm_open(vma);
   587		if (ret)
   588			return ret;
   589	
 > 590		ret = call_mmap(sfd->file, vma, do_populate);
   591		if (ret) {
   592			shm_close(vma);
   593			return ret;
   594		}
   595		sfd->vm_ops = vma->vm_ops;
   596	#ifdef CONFIG_MMU
   597		WARN_ON(!sfd->vm_ops->fault);
   598	#endif
   599		vma->vm_ops = &shm_vm_ops;
   600		return 0;
   601	}
   602	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2022-03-08 12:51 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-06  3:26 [PATCH RFC v2] mm: Add f_ops->populate() Jarkko Sakkinen
2022-03-06  3:26 ` [Intel-gfx] " Jarkko Sakkinen
2022-03-06  3:26 ` Jarkko Sakkinen
2022-03-06 23:24 ` Andrew Morton
2022-03-06 23:24   ` [Intel-gfx] " Andrew Morton
2022-03-06 23:24   ` Andrew Morton
2022-03-06 23:41   ` Dave Hansen
2022-03-06 23:41     ` [Intel-gfx] " Dave Hansen
2022-03-06 23:41     ` Dave Hansen
2022-03-07 11:27     ` Jarkko Sakkinen
2022-03-07 11:27       ` [Intel-gfx] " Jarkko Sakkinen
2022-03-07 11:27       ` Jarkko Sakkinen
2022-03-07 15:29       ` Dave Hansen
2022-03-07 15:29         ` [Intel-gfx] " Dave Hansen
2022-03-07 15:29         ` Dave Hansen
2022-03-07 15:44         ` Jarkko Sakkinen
2022-03-07 15:44           ` [Intel-gfx] " Jarkko Sakkinen
2022-03-07 15:44           ` Jarkko Sakkinen
2022-03-07 14:37     ` Matthew Wilcox
2022-03-07 14:37       ` [Intel-gfx] " Matthew Wilcox
2022-03-07 14:37       ` Matthew Wilcox
2022-03-07 15:43       ` Jarkko Sakkinen
2022-03-07 15:43         ` [Intel-gfx] " Jarkko Sakkinen
2022-03-07 15:43         ` Jarkko Sakkinen
2022-03-07 13:00   ` Jarkko Sakkinen
2022-03-07 13:00     ` [Intel-gfx] " Jarkko Sakkinen
2022-03-07 13:00     ` Jarkko Sakkinen
2022-03-08  8:28 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.