All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Reduce impact of overlayfs fake path files
@ 2023-06-09  7:32 Amir Goldstein
  2023-06-09  7:32 ` [PATCH 1/3] fs: use fake_file container for internal files with fake f_path Amir Goldstein
                   ` (4 more replies)
  0 siblings, 5 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09  7:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

Miklos,

This is the solution that we discussed for removing FMODE_NONOTIFY
from overlayfs real files.

My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
I am still testing the ovl-fsnotify interaction, so we can defer
that step to later.

I wanted to post this series earlier to give more time for fsdevel
feedback and if these patches get your blessing and the blessing of
vfs maintainers, it is probably better that they will go through the
vfs tree.

I've tested that overlay "fake" path are still shown in /proc/self/maps
and in the /proc/self/exe and /proc/self/map_files/ symlinks.

The audit and tomoyo use of file_fake_path() is not tested
(CC maintainers), but they both look like user displayed paths,
so I assumed they's want to preserve the existing behavior
(i.e. displaying the fake overlayfs path).

Thanks,
Amir.

[1] https://github.com/amir73il/linux/commits/ovl_fake_path

Amir Goldstein (3):
  fs: use fake_file container for internal files with fake f_path
  fs: use file_fake_path() to get path of mapped files for display
  fs: store fake path in file_fake along with real path

 fs/cachefiles/namei.c  |  2 +-
 fs/file_table.c        | 85 ++++++++++++++++++++++++++++++++++--------
 fs/internal.h          |  5 ++-
 fs/namei.c             |  2 +-
 fs/open.c              |  9 +++--
 fs/overlayfs/file.c    |  2 +-
 fs/proc/base.c         |  8 ++--
 fs/seq_file.c          |  2 +-
 include/linux/fs.h     | 13 ++++---
 kernel/audit.c         |  3 +-
 kernel/fork.c          |  5 ++-
 security/tomoyo/util.c |  3 +-
 12 files changed, 102 insertions(+), 37 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
@ 2023-06-09  7:32 ` Amir Goldstein
  2023-06-09 11:32   ` Christian Brauner
  2023-06-09  7:32 ` [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display Amir Goldstein
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09  7:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

Overlayfs and cachefiles use open_with_fake_path() to allocate internal
files, where overlayfs also puts a "fake" path in f_path - a path which
is not on the same fs as f_inode.

Allocate a container struct file_fake for those internal files, that
will be used to hold the fake path qlong with the real path.

This commit does not populate the extra fake_path field and leaves the
overlayfs internal file's f_path fake.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/cachefiles/namei.c |  2 +-
 fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
 fs/internal.h         |  5 ++-
 fs/namei.c            |  2 +-
 fs/open.c             | 11 +++---
 fs/overlayfs/file.c   |  2 +-
 include/linux/fs.h    | 13 ++++---
 7 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 82219a8f6084..a71bdf2d03ba 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
 	path.mnt = cache->mnt;
 	path.dentry = dentry;
 	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
-				   d_backing_inode(dentry), cache->cache_cred);
+				   &path, cache->cache_cred);
 	if (IS_ERR(file)) {
 		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
 					   PTR_ERR(file),
diff --git a/fs/file_table.c b/fs/file_table.c
index 372653b92617..adc2a92faa52 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
+/* Container for file with optional fake path to display in /proc files */
+struct file_fake {
+	struct file file;
+	struct path fake_path;
+};
+
+static inline struct file_fake *file_fake(struct file *f)
+{
+	return container_of(f, struct file_fake, file);
+}
+
+/* Returns fake_path if one exists, f_path otherwise */
+const struct path *file_fake_path(struct file *f)
+{
+	struct file_fake *ff = file_fake(f);
+
+	if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
+		return &f->f_path;
+
+	return &ff->fake_path;
+}
+EXPORT_SYMBOL(file_fake_path);
+
 static void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f = container_of(head, struct file, f_rcuhead);
 
 	put_cred(f->f_cred);
-	kmem_cache_free(filp_cachep, f);
+	if (f->f_mode & FMODE_FAKE_PATH)
+		kfree(file_fake(f));
+	else
+		kmem_cache_free(filp_cachep, f);
 }
 
 static inline void file_free(struct file *f)
 {
+	struct file_fake *ff = file_fake(f);
+
 	security_file_free(f);
-	if (!(f->f_mode & FMODE_NOACCOUNT))
+	if (f->f_mode & FMODE_FAKE_PATH)
+		path_put(&ff->fake_path);
+	else
 		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_rcuhead, file_free_rcu);
 }
@@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
 fs_initcall(init_fs_stat_sysctls);
 #endif
 
-static struct file *__alloc_file(int flags, const struct cred *cred)
+static int init_file(struct file *f, int flags, const struct cred *cred)
 {
-	struct file *f;
 	int error;
 
-	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-	if (unlikely(!f))
-		return ERR_PTR(-ENOMEM);
-
 	f->f_cred = get_cred(cred);
 	error = security_file_alloc(f);
 	if (unlikely(error)) {
 		file_free_rcu(&f->f_rcuhead);
-		return ERR_PTR(error);
+		return error;
 	}
 
 	atomic_long_set(&f->f_count, 1);
@@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
 	f->f_mode = OPEN_FMODE(flags);
 	/* f->f_version: 0 */
 
+	return 0;
+}
+
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+	if (unlikely(!f))
+		return ERR_PTR(-ENOMEM);
+
+	error = init_file(f, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
 	return f;
 }
 
@@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
 }
 
 /*
- * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ * Variant of alloc_empty_file() that allocates a file_fake container
+ * and doesn't check and modify nr_files.
  *
  * Should not be used unless there's a very good reason to do so.
  */
-struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
+				   const struct cred *cred)
 {
-	struct file *f = __alloc_file(flags, cred);
+	struct file_fake *ff;
+	int error;
 
-	if (!IS_ERR(f))
-		f->f_mode |= FMODE_NOACCOUNT;
+	ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
+	if (unlikely(!ff))
+		return ERR_PTR(-ENOMEM);
 
-	return f;
+	error = init_file(&ff->file, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	ff->file.f_mode |= FMODE_FAKE_PATH;
+	if (fake_path) {
+		path_get(fake_path);
+		ff->fake_path = *fake_path;
+	}
+
+	return &ff->file;
 }
 
 /**
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..8890c694745b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+extern struct file *alloc_empty_file(int flags, const struct cred *cred);
+extern struct file *alloc_empty_file_fake(const struct path *fake_path,
+					  int flags, const struct cred *cred);
 
 static inline void put_file_access(struct file *file)
 {
diff --git a/fs/namei.c b/fs/namei.c
index e4fe0879ae55..5e6de1f29f4e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3721,7 +3721,7 @@ struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
 	struct file *file;
 	int error;
 
-	file = alloc_empty_file_noaccount(open_flag, cred);
+	file = alloc_empty_file_fake(NULL, open_flag, cred);
 	if (!IS_ERR(file)) {
 		error = vfs_tmpfile(idmap, parentpath, file, mode);
 		if (error) {
diff --git a/fs/open.c b/fs/open.c
index 4478adcc4f3a..c9e2300a037d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1116,15 +1116,16 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 }
 EXPORT_SYMBOL(dentry_create);
 
-struct file *open_with_fake_path(const struct path *path, int flags,
-				struct inode *inode, const struct cred *cred)
+struct file *open_with_fake_path(const struct path *fake_path, int flags,
+				 const struct path *path,
+				 const struct cred *cred)
 {
-	struct file *f = alloc_empty_file_noaccount(flags, cred);
+	struct file *f = alloc_empty_file_fake(NULL, flags, cred);
 	if (!IS_ERR(f)) {
 		int error;
 
-		f->f_path = *path;
-		error = do_dentry_open(f, inode, NULL);
+		f->f_path = *fake_path;
+		error = do_dentry_open(f, d_inode(path->dentry), NULL);
 		if (error) {
 			fput(f);
 			f = ERR_PTR(error);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 7c04f033aadd..f5987377e9eb 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -61,7 +61,7 @@ static struct file *ovl_open_realfile(const struct file *file,
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
 
-		realfile = open_with_fake_path(&file->f_path, flags, realinode,
+		realfile = open_with_fake_path(&file->f_path, flags, realpath,
 					       current_cred());
 	}
 	revert_creds(old_cred);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..b112a3c9b499 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -180,8 +180,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File represents mount that needs unmounting */
 #define FMODE_NEED_UNMOUNT	((__force fmode_t)0x10000000)
 
-/* File does not contribute to nr_files count */
-#define FMODE_NOACCOUNT		((__force fmode_t)0x20000000)
+/* File is embedded in file_fake and doesn't contribute to nr_files */
+#define FMODE_FAKE_PATH		((__force fmode_t)0x20000000)
 
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
@@ -2349,11 +2349,14 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
 	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
 			      name, flags, mode);
 }
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file *dentry_open(const struct path *path, int flags,
+				const struct cred *creds);
 extern struct file *dentry_create(const struct path *path, int flags,
 				  umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
-					 struct inode*, const struct cred *);
+extern struct file *open_with_fake_path(const struct path *fake_path, int flags,
+					const struct path *path,
+					const struct cred *cred);
+extern const struct path *file_fake_path(struct file *file);
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display
  2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
  2023-06-09  7:32 ` [PATCH 1/3] fs: use fake_file container for internal files with fake f_path Amir Goldstein
@ 2023-06-09  7:32 ` Amir Goldstein
  2023-06-09  8:19   ` Miklos Szeredi
  2023-06-09  7:32 ` [PATCH 3/3] fs: store fake path in file_fake along with real path Amir Goldstein
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09  7:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

/proc/$pid/maps and /proc/$pid/exe contain display paths of mapped file.
audot and tomoyo also log the display path of the mapped exec file.

When the mapped file comes from overlayfs, we need to use the macro
file_fake_path() to make sure that we get the fake overlayfs path and
not the real internal path.

At the time of this commit, file_fake_path() always returns f_path,
where overlayfs has stored the fake overlayfs path, but soon we are
going to change the location that the fake path is stored.

Cc: Paul Moore <paul@paul-moore.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/proc/base.c         | 8 +++++---
 fs/seq_file.c          | 2 +-
 kernel/audit.c         | 3 ++-
 kernel/fork.c          | 5 +++--
 security/tomoyo/util.c | 3 ++-
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05452c3b9872..d6f8c77a3e38 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1730,8 +1730,9 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 	exe_file = get_task_exe_file(task);
 	put_task_struct(task);
 	if (exe_file) {
-		*exe_path = exe_file->f_path;
-		path_get(&exe_file->f_path);
+		/* Overlayfs mapped files have fake path */
+		*exe_path = *file_fake_path(exe_file);
+		path_get(exe_path);
 		fput(exe_file);
 		return 0;
 	} else
@@ -2218,7 +2219,8 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
 	rc = -ENOENT;
 	vma = find_exact_vma(mm, vm_start, vm_end);
 	if (vma && vma->vm_file) {
-		*path = vma->vm_file->f_path;
+		/* Overlayfs mapped files have fake path */
+		*path = *file_fake_path(vma->vm_file);
 		path_get(path);
 		rc = 0;
 	}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..7e65fde4336a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -497,7 +497,7 @@ EXPORT_SYMBOL(seq_path);
  */
 int seq_file_path(struct seq_file *m, struct file *file, const char *esc)
 {
-	return seq_path(m, &file->f_path, esc);
+	return seq_path(m, file_fake_path(file), esc);
 }
 EXPORT_SYMBOL(seq_file_path);
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 9bc0b0301198..91975f139a03 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2202,7 +2202,8 @@ void audit_log_d_path_exe(struct audit_buffer *ab,
 	if (!exe_file)
 		goto out_null;
 
-	audit_log_d_path(ab, " exe=", &exe_file->f_path);
+	/* Overlayfs mapped files have fake path */
+	audit_log_d_path(ab, " exe=", file_fake_path(exe_file));
 	fput(exe_file);
 	return;
 out_null:
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..9a3c138a677e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1455,8 +1455,9 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 		for_each_vma(vmi, vma) {
 			if (!vma->vm_file)
 				continue;
-			if (path_equal(&vma->vm_file->f_path,
-				       &old_exe_file->f_path)) {
+			/* Overlayfs mapped files have fake path */
+			if (path_equal(file_fake_path(vma->vm_file),
+				       file_fake_path(old_exe_file))) {
 				ret = -EBUSY;
 				break;
 			}
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 6799b1122c9d..ff0d94fb431c 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -975,7 +975,8 @@ const char *tomoyo_get_exe(void)
 	if (!exe_file)
 		return NULL;
 
-	cp = tomoyo_realpath_from_path(&exe_file->f_path);
+	/* Overlayfs mapped files have fake path */
+	cp = tomoyo_realpath_from_path(file_fake_path(exe_file));
 	fput(exe_file);
 	return cp;
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/3] fs: store fake path in file_fake along with real path
  2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
  2023-06-09  7:32 ` [PATCH 1/3] fs: use fake_file container for internal files with fake f_path Amir Goldstein
  2023-06-09  7:32 ` [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display Amir Goldstein
@ 2023-06-09  7:32 ` Amir Goldstein
  2023-06-09 11:12   ` Christian Brauner
  2023-06-09 13:15 ` [PATCH 0/3] Reduce impact of overlayfs fake path files Miklos Szeredi
  2023-06-09 13:15 ` Tetsuo Handa
  4 siblings, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09  7:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

Instead of storing only the fake path in f_path, store the real path
in f_path and the fake path in file_fake container.

Call sites that use the macro file_fake_path() continue to get the fake
path from its new location.

Call sites that access f_path directly will now see the overlayfs real
path instead of the fake overlayfs path, which is the desired bahvior
for most users, because it makes f_path consistent with f_inode.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/open.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index c9e2300a037d..4f4e7534f515 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1120,11 +1120,11 @@ struct file *open_with_fake_path(const struct path *fake_path, int flags,
 				 const struct path *path,
 				 const struct cred *cred)
 {
-	struct file *f = alloc_empty_file_fake(NULL, flags, cred);
+	struct file *f = alloc_empty_file_fake(fake_path, flags, cred);
 	if (!IS_ERR(f)) {
 		int error;
 
-		f->f_path = *fake_path;
+		f->f_path = *path;
 		error = do_dentry_open(f, d_inode(path->dentry), NULL);
 		if (error) {
 			fput(f);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display
  2023-06-09  7:32 ` [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display Amir Goldstein
@ 2023-06-09  8:19   ` Miklos Szeredi
  0 siblings, 0 replies; 27+ messages in thread
From: Miklos Szeredi @ 2023-06-09  8:19 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
>
> /proc/$pid/maps and /proc/$pid/exe contain display paths of mapped file.
> audot and tomoyo also log the display path of the mapped exec file.

/proc/PID/exe is based on task->mm->exe_file.  AFAICS this will be the
overlay file not the realfile, so it shouldn't need any special
treatment.

Same for tomoyo.

Maybe I'm missing something?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] fs: store fake path in file_fake along with real path
  2023-06-09  7:32 ` [PATCH 3/3] fs: store fake path in file_fake along with real path Amir Goldstein
@ 2023-06-09 11:12   ` Christian Brauner
  2023-06-09 11:30     ` Amir Goldstein
  0 siblings, 1 reply; 27+ messages in thread
From: Christian Brauner @ 2023-06-09 11:12 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 09, 2023 at 10:32:39AM +0300, Amir Goldstein wrote:
> Instead of storing only the fake path in f_path, store the real path
> in f_path and the fake path in file_fake container.
> 
> Call sites that use the macro file_fake_path() continue to get the fake
> path from its new location.
> 
> Call sites that access f_path directly will now see the overlayfs real
> path instead of the fake overlayfs path, which is the desired bahvior
> for most users, because it makes f_path consistent with f_inode.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---

If you resend, can you take the chance and refactor this into a slightly more
readable pattern, please? So something like

struct file *open_with_fake_path(const struct path *fake_path, int flags,
                                 const struct path *path,
                                 const struct cred *cred)
{
        int error;
        struct file *f;

        f = alloc_empty_file_fake(fake_path, flags, cred);
        if (IS_ERR(f))
                return f;

        f->f_path = *path;
        error = do_dentry_open(f, d_inode(path->dentry), NULL);
        if (error) {
                fput(f);
                return ERR_PTR(error);
        }

        return f;
}

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] fs: store fake path in file_fake along with real path
  2023-06-09 11:12   ` Christian Brauner
@ 2023-06-09 11:30     ` Amir Goldstein
  0 siblings, 0 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 11:30 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 9, 2023 at 2:12 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 10:32:39AM +0300, Amir Goldstein wrote:
> > Instead of storing only the fake path in f_path, store the real path
> > in f_path and the fake path in file_fake container.
> >
> > Call sites that use the macro file_fake_path() continue to get the fake
> > path from its new location.
> >
> > Call sites that access f_path directly will now see the overlayfs real
> > path instead of the fake overlayfs path, which is the desired bahvior
> > for most users, because it makes f_path consistent with f_inode.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
>
> If you resend, can you take the chance and refactor this into a slightly more
> readable pattern, please? So something like
>

Sure!
I like this better myself.

Thanks,
Amir.

> struct file *open_with_fake_path(const struct path *fake_path, int flags,
>                                  const struct path *path,
>                                  const struct cred *cred)
> {
>         int error;
>         struct file *f;
>
>         f = alloc_empty_file_fake(fake_path, flags, cred);
>         if (IS_ERR(f))
>                 return f;
>
>         f->f_path = *path;
>         error = do_dentry_open(f, d_inode(path->dentry), NULL);
>         if (error) {
>                 fput(f);
>                 return ERR_PTR(error);
>         }
>
>         return f;
> }

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09  7:32 ` [PATCH 1/3] fs: use fake_file container for internal files with fake f_path Amir Goldstein
@ 2023-06-09 11:32   ` Christian Brauner
  2023-06-09 11:57     ` Amir Goldstein
  0 siblings, 1 reply; 27+ messages in thread
From: Christian Brauner @ 2023-06-09 11:32 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> files, where overlayfs also puts a "fake" path in f_path - a path which
> is not on the same fs as f_inode.
> 
> Allocate a container struct file_fake for those internal files, that
> will be used to hold the fake path qlong with the real path.
> 
> This commit does not populate the extra fake_path field and leaves the
> overlayfs internal file's f_path fake.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/cachefiles/namei.c |  2 +-
>  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
>  fs/internal.h         |  5 ++-
>  fs/namei.c            |  2 +-
>  fs/open.c             | 11 +++---
>  fs/overlayfs/file.c   |  2 +-
>  include/linux/fs.h    | 13 ++++---
>  7 files changed, 90 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> index 82219a8f6084..a71bdf2d03ba 100644
> --- a/fs/cachefiles/namei.c
> +++ b/fs/cachefiles/namei.c
> @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
>  	path.mnt = cache->mnt;
>  	path.dentry = dentry;
>  	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> -				   d_backing_inode(dentry), cache->cache_cred);
> +				   &path, cache->cache_cred);
>  	if (IS_ERR(file)) {
>  		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
>  					   PTR_ERR(file),
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 372653b92617..adc2a92faa52 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
>  
>  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
>  
> +/* Container for file with optional fake path to display in /proc files */
> +struct file_fake {
> +	struct file file;
> +	struct path fake_path;
> +};
> +
> +static inline struct file_fake *file_fake(struct file *f)
> +{
> +	return container_of(f, struct file_fake, file);
> +}
> +
> +/* Returns fake_path if one exists, f_path otherwise */
> +const struct path *file_fake_path(struct file *f)
> +{
> +	struct file_fake *ff = file_fake(f);
> +
> +	if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> +		return &f->f_path;
> +
> +	return &ff->fake_path;
> +}
> +EXPORT_SYMBOL(file_fake_path);
> +
>  static void file_free_rcu(struct rcu_head *head)
>  {
>  	struct file *f = container_of(head, struct file, f_rcuhead);
>  
>  	put_cred(f->f_cred);
> -	kmem_cache_free(filp_cachep, f);
> +	if (f->f_mode & FMODE_FAKE_PATH)
> +		kfree(file_fake(f));
> +	else
> +		kmem_cache_free(filp_cachep, f);
>  }
>  
>  static inline void file_free(struct file *f)
>  {
> +	struct file_fake *ff = file_fake(f);
> +
>  	security_file_free(f);
> -	if (!(f->f_mode & FMODE_NOACCOUNT))
> +	if (f->f_mode & FMODE_FAKE_PATH)
> +		path_put(&ff->fake_path);
> +	else
>  		percpu_counter_dec(&nr_files);
>  	call_rcu(&f->f_rcuhead, file_free_rcu);
>  }
> @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
>  fs_initcall(init_fs_stat_sysctls);
>  #endif
>  
> -static struct file *__alloc_file(int flags, const struct cred *cred)
> +static int init_file(struct file *f, int flags, const struct cred *cred)
>  {
> -	struct file *f;
>  	int error;
>  
> -	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> -	if (unlikely(!f))
> -		return ERR_PTR(-ENOMEM);
> -
>  	f->f_cred = get_cred(cred);
>  	error = security_file_alloc(f);
>  	if (unlikely(error)) {
>  		file_free_rcu(&f->f_rcuhead);
> -		return ERR_PTR(error);
> +		return error;
>  	}
>  
>  	atomic_long_set(&f->f_count, 1);
> @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
>  	f->f_mode = OPEN_FMODE(flags);
>  	/* f->f_version: 0 */
>  
> +	return 0;
> +}
> +
> +static struct file *__alloc_file(int flags, const struct cred *cred)
> +{
> +	struct file *f;
> +	int error;
> +
> +	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> +	if (unlikely(!f))
> +		return ERR_PTR(-ENOMEM);
> +
> +	error = init_file(f, flags, cred);
> +	if (unlikely(error))
> +		return ERR_PTR(error);
> +
>  	return f;
>  }
>  
> @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
>  }
>  
>  /*
> - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> + * Variant of alloc_empty_file() that allocates a file_fake container
> + * and doesn't check and modify nr_files.
>   *
>   * Should not be used unless there's a very good reason to do so.
>   */
> -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> +				   const struct cred *cred)
>  {
> -	struct file *f = __alloc_file(flags, cred);
> +	struct file_fake *ff;
> +	int error;
>  
> -	if (!IS_ERR(f))
> -		f->f_mode |= FMODE_NOACCOUNT;
> +	ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> +	if (unlikely(!ff))
> +		return ERR_PTR(-ENOMEM);
>  
> -	return f;
> +	error = init_file(&ff->file, flags, cred);
> +	if (unlikely(error))
> +		return ERR_PTR(error);
> +
> +	ff->file.f_mode |= FMODE_FAKE_PATH;
> +	if (fake_path) {
> +		path_get(fake_path);
> +		ff->fake_path = *fake_path;
> +	}

Hm, I see that this check is mostly done for vfs_tmpfile_open() which
only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
NULL.

So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
is an invitation for NULL derefs sooner or later. I would simply
document that it's required to set ff->fake_path. For callers such as
vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
should set ff->fake_path to file->f_path.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 11:32   ` Christian Brauner
@ 2023-06-09 11:57     ` Amir Goldstein
  2023-06-09 12:12       ` Christian Brauner
  0 siblings, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 11:57 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > files, where overlayfs also puts a "fake" path in f_path - a path which
> > is not on the same fs as f_inode.
> >
> > Allocate a container struct file_fake for those internal files, that
> > will be used to hold the fake path qlong with the real path.
> >
> > This commit does not populate the extra fake_path field and leaves the
> > overlayfs internal file's f_path fake.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/cachefiles/namei.c |  2 +-
> >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> >  fs/internal.h         |  5 ++-
> >  fs/namei.c            |  2 +-
> >  fs/open.c             | 11 +++---
> >  fs/overlayfs/file.c   |  2 +-
> >  include/linux/fs.h    | 13 ++++---
> >  7 files changed, 90 insertions(+), 30 deletions(-)
> >
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 82219a8f6084..a71bdf2d03ba 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> >       path.mnt = cache->mnt;
> >       path.dentry = dentry;
> >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > -                                d_backing_inode(dentry), cache->cache_cred);
> > +                                &path, cache->cache_cred);
> >       if (IS_ERR(file)) {
> >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> >                                          PTR_ERR(file),
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 372653b92617..adc2a92faa52 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> >
> >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> >
> > +/* Container for file with optional fake path to display in /proc files */
> > +struct file_fake {
> > +     struct file file;
> > +     struct path fake_path;
> > +};
> > +
> > +static inline struct file_fake *file_fake(struct file *f)
> > +{
> > +     return container_of(f, struct file_fake, file);
> > +}
> > +
> > +/* Returns fake_path if one exists, f_path otherwise */
> > +const struct path *file_fake_path(struct file *f)
> > +{
> > +     struct file_fake *ff = file_fake(f);
> > +
> > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > +             return &f->f_path;
> > +
> > +     return &ff->fake_path;
> > +}
> > +EXPORT_SYMBOL(file_fake_path);
> > +
> >  static void file_free_rcu(struct rcu_head *head)
> >  {
> >       struct file *f = container_of(head, struct file, f_rcuhead);
> >
> >       put_cred(f->f_cred);
> > -     kmem_cache_free(filp_cachep, f);
> > +     if (f->f_mode & FMODE_FAKE_PATH)
> > +             kfree(file_fake(f));
> > +     else
> > +             kmem_cache_free(filp_cachep, f);
> >  }
> >
> >  static inline void file_free(struct file *f)
> >  {
> > +     struct file_fake *ff = file_fake(f);
> > +
> >       security_file_free(f);
> > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > +     if (f->f_mode & FMODE_FAKE_PATH)
> > +             path_put(&ff->fake_path);
> > +     else
> >               percpu_counter_dec(&nr_files);
> >       call_rcu(&f->f_rcuhead, file_free_rcu);
> >  }
> > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> >  fs_initcall(init_fs_stat_sysctls);
> >  #endif
> >
> > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > +static int init_file(struct file *f, int flags, const struct cred *cred)
> >  {
> > -     struct file *f;
> >       int error;
> >
> > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > -     if (unlikely(!f))
> > -             return ERR_PTR(-ENOMEM);
> > -
> >       f->f_cred = get_cred(cred);
> >       error = security_file_alloc(f);
> >       if (unlikely(error)) {
> >               file_free_rcu(&f->f_rcuhead);
> > -             return ERR_PTR(error);
> > +             return error;
> >       }
> >
> >       atomic_long_set(&f->f_count, 1);
> > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> >       f->f_mode = OPEN_FMODE(flags);
> >       /* f->f_version: 0 */
> >
> > +     return 0;
> > +}
> > +
> > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > +{
> > +     struct file *f;
> > +     int error;
> > +
> > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > +     if (unlikely(!f))
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     error = init_file(f, flags, cred);
> > +     if (unlikely(error))
> > +             return ERR_PTR(error);
> > +
> >       return f;
> >  }
> >
> > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> >  }
> >
> >  /*
> > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > + * Variant of alloc_empty_file() that allocates a file_fake container
> > + * and doesn't check and modify nr_files.
> >   *
> >   * Should not be used unless there's a very good reason to do so.
> >   */
> > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > +                                const struct cred *cred)
> >  {
> > -     struct file *f = __alloc_file(flags, cred);
> > +     struct file_fake *ff;
> > +     int error;
> >
> > -     if (!IS_ERR(f))
> > -             f->f_mode |= FMODE_NOACCOUNT;
> > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > +     if (unlikely(!ff))
> > +             return ERR_PTR(-ENOMEM);
> >
> > -     return f;
> > +     error = init_file(&ff->file, flags, cred);
> > +     if (unlikely(error))
> > +             return ERR_PTR(error);
> > +
> > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > +     if (fake_path) {
> > +             path_get(fake_path);
> > +             ff->fake_path = *fake_path;
> > +     }
>
> Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> NULL.
>
> So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> is an invitation for NULL derefs sooner or later. I would simply
> document that it's required to set ff->fake_path. For callers such as
> vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> should set ff->fake_path to file->f_path.

Makes sense.
I also took the liberty to re-arrange vfs_tmpfile_open() without the
unneeded if (!error) { nesting depth.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 11:57     ` Amir Goldstein
@ 2023-06-09 12:12       ` Christian Brauner
  2023-06-09 12:20         ` Amir Goldstein
  2023-06-11 19:11         ` Amir Goldstein
  0 siblings, 2 replies; 27+ messages in thread
From: Christian Brauner @ 2023-06-09 12:12 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > is not on the same fs as f_inode.
> > >
> > > Allocate a container struct file_fake for those internal files, that
> > > will be used to hold the fake path qlong with the real path.
> > >
> > > This commit does not populate the extra fake_path field and leaves the
> > > overlayfs internal file's f_path fake.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/cachefiles/namei.c |  2 +-
> > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > >  fs/internal.h         |  5 ++-
> > >  fs/namei.c            |  2 +-
> > >  fs/open.c             | 11 +++---
> > >  fs/overlayfs/file.c   |  2 +-
> > >  include/linux/fs.h    | 13 ++++---
> > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > index 82219a8f6084..a71bdf2d03ba 100644
> > > --- a/fs/cachefiles/namei.c
> > > +++ b/fs/cachefiles/namei.c
> > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > >       path.mnt = cache->mnt;
> > >       path.dentry = dentry;
> > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > +                                &path, cache->cache_cred);
> > >       if (IS_ERR(file)) {
> > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > >                                          PTR_ERR(file),
> > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > index 372653b92617..adc2a92faa52 100644
> > > --- a/fs/file_table.c
> > > +++ b/fs/file_table.c
> > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > >
> > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > >
> > > +/* Container for file with optional fake path to display in /proc files */
> > > +struct file_fake {
> > > +     struct file file;
> > > +     struct path fake_path;
> > > +};
> > > +
> > > +static inline struct file_fake *file_fake(struct file *f)
> > > +{
> > > +     return container_of(f, struct file_fake, file);
> > > +}
> > > +
> > > +/* Returns fake_path if one exists, f_path otherwise */
> > > +const struct path *file_fake_path(struct file *f)
> > > +{
> > > +     struct file_fake *ff = file_fake(f);
> > > +
> > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > +             return &f->f_path;
> > > +
> > > +     return &ff->fake_path;
> > > +}
> > > +EXPORT_SYMBOL(file_fake_path);
> > > +
> > >  static void file_free_rcu(struct rcu_head *head)
> > >  {
> > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > >
> > >       put_cred(f->f_cred);
> > > -     kmem_cache_free(filp_cachep, f);
> > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > +             kfree(file_fake(f));
> > > +     else
> > > +             kmem_cache_free(filp_cachep, f);
> > >  }
> > >
> > >  static inline void file_free(struct file *f)
> > >  {
> > > +     struct file_fake *ff = file_fake(f);
> > > +
> > >       security_file_free(f);
> > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > +             path_put(&ff->fake_path);
> > > +     else
> > >               percpu_counter_dec(&nr_files);
> > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > >  }
> > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > >  fs_initcall(init_fs_stat_sysctls);
> > >  #endif
> > >
> > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > >  {
> > > -     struct file *f;
> > >       int error;
> > >
> > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > -     if (unlikely(!f))
> > > -             return ERR_PTR(-ENOMEM);
> > > -
> > >       f->f_cred = get_cred(cred);
> > >       error = security_file_alloc(f);
> > >       if (unlikely(error)) {
> > >               file_free_rcu(&f->f_rcuhead);
> > > -             return ERR_PTR(error);
> > > +             return error;
> > >       }
> > >
> > >       atomic_long_set(&f->f_count, 1);
> > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > >       f->f_mode = OPEN_FMODE(flags);
> > >       /* f->f_version: 0 */
> > >
> > > +     return 0;
> > > +}
> > > +
> > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > +{
> > > +     struct file *f;
> > > +     int error;
> > > +
> > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > +     if (unlikely(!f))
> > > +             return ERR_PTR(-ENOMEM);
> > > +
> > > +     error = init_file(f, flags, cred);
> > > +     if (unlikely(error))
> > > +             return ERR_PTR(error);
> > > +
> > >       return f;
> > >  }
> > >
> > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > >  }
> > >
> > >  /*
> > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > + * and doesn't check and modify nr_files.
> > >   *
> > >   * Should not be used unless there's a very good reason to do so.
> > >   */
> > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > +                                const struct cred *cred)
> > >  {
> > > -     struct file *f = __alloc_file(flags, cred);
> > > +     struct file_fake *ff;
> > > +     int error;
> > >
> > > -     if (!IS_ERR(f))
> > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > +     if (unlikely(!ff))
> > > +             return ERR_PTR(-ENOMEM);
> > >
> > > -     return f;
> > > +     error = init_file(&ff->file, flags, cred);
> > > +     if (unlikely(error))
> > > +             return ERR_PTR(error);
> > > +
> > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > +     if (fake_path) {
> > > +             path_get(fake_path);
> > > +             ff->fake_path = *fake_path;
> > > +     }
> >
> > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > NULL.
> >
> > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > is an invitation for NULL derefs sooner or later. I would simply
> > document that it's required to set ff->fake_path. For callers such as
> > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > should set ff->fake_path to file->f_path.
> 
> Makes sense.
> I also took the liberty to re-arrange vfs_tmpfile_open() without the
> unneeded if (!error) { nesting depth.

Yes, please. I had a rough sketch just for my own amusement...

fs/namei.c
  struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
                                const struct path *parentpath, umode_t mode,
                                int open_flag, const struct cred *cred)
  {
          struct file *file;
          int error;

          file = alloc_empty_file_fake(open_flag, cred);
          if (IS_ERR(file))
                  return file;

          error = vfs_tmpfile(idmap, parentpath, file, mode);
          if (error) {
                  fput(file);
                  return ERR_PTR(error);
          }

          return file_set_fake_path(file, &file->f_path);
  }
  EXPORT_SYMBOL(vfs_tmpfile_open);

fs/internal.h
  struct file *file_set_fake_path(struct file *file, const struct path *fake_path);

fs/open.c
  struct file *open_with_fake_path(const struct path *fake_path, int flags,
                                   const struct path *path,
                                   const struct cred *cred)
  {
          int error;
          struct file *file;

          file = alloc_empty_file_fake(flags, cred);
          if (IS_ERR(file))
                  return file;

          file->f_path = *path;
          error = do_dentry_open(file, d_inode(path->dentry), NULL);
          if (error) {
                  fput(file);
                  return ERR_PTR(error);
          }

          return file_set_fake_path(file, fake_path);
  }

fs/file_table.c
  struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
  {
          struct file_fake *ff;
          int error;

          ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
          if (unlikely(!ff))
                  return ERR_PTR(-ENOMEM);

          error = init_file(&ff->file, flags, cred);
          if (unlikely(error))
                  return ERR_PTR(error);

          ff->file.f_mode |= FMODE_FAKE_PATH;
          return &ff->file;
  }

  struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
  {
          if (file->f_mode & FMODE_FAKE_PATH) {
                  struct file_fake *ff = file_fake(file);
                  ff->fake_path = *fake_path;
          }

          return file;
  }


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 12:12       ` Christian Brauner
@ 2023-06-09 12:20         ` Amir Goldstein
  2023-06-09 12:54           ` Christian Brauner
  2023-06-11 19:11         ` Amir Goldstein
  1 sibling, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 12:20 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > is not on the same fs as f_inode.
> > > >
> > > > Allocate a container struct file_fake for those internal files, that
> > > > will be used to hold the fake path qlong with the real path.
> > > >
> > > > This commit does not populate the extra fake_path field and leaves the
> > > > overlayfs internal file's f_path fake.
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >  fs/cachefiles/namei.c |  2 +-
> > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > >  fs/internal.h         |  5 ++-
> > > >  fs/namei.c            |  2 +-
> > > >  fs/open.c             | 11 +++---
> > > >  fs/overlayfs/file.c   |  2 +-
> > > >  include/linux/fs.h    | 13 ++++---
> > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > --- a/fs/cachefiles/namei.c
> > > > +++ b/fs/cachefiles/namei.c
> > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > >       path.mnt = cache->mnt;
> > > >       path.dentry = dentry;
> > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > +                                &path, cache->cache_cred);
> > > >       if (IS_ERR(file)) {
> > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > >                                          PTR_ERR(file),
> > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > index 372653b92617..adc2a92faa52 100644
> > > > --- a/fs/file_table.c
> > > > +++ b/fs/file_table.c
> > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > >
> > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > >
> > > > +/* Container for file with optional fake path to display in /proc files */
> > > > +struct file_fake {
> > > > +     struct file file;
> > > > +     struct path fake_path;
> > > > +};
> > > > +
> > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > +{
> > > > +     return container_of(f, struct file_fake, file);
> > > > +}
> > > > +
> > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > +const struct path *file_fake_path(struct file *f)
> > > > +{
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > +             return &f->f_path;
> > > > +
> > > > +     return &ff->fake_path;
> > > > +}
> > > > +EXPORT_SYMBOL(file_fake_path);
> > > > +
> > > >  static void file_free_rcu(struct rcu_head *head)
> > > >  {
> > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > >
> > > >       put_cred(f->f_cred);
> > > > -     kmem_cache_free(filp_cachep, f);
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             kfree(file_fake(f));
> > > > +     else
> > > > +             kmem_cache_free(filp_cachep, f);
> > > >  }
> > > >
> > > >  static inline void file_free(struct file *f)
> > > >  {
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > >       security_file_free(f);
> > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             path_put(&ff->fake_path);
> > > > +     else
> > > >               percpu_counter_dec(&nr_files);
> > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > >  }
> > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > >  fs_initcall(init_fs_stat_sysctls);
> > > >  #endif
> > > >
> > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > >  {
> > > > -     struct file *f;
> > > >       int error;
> > > >
> > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > -     if (unlikely(!f))
> > > > -             return ERR_PTR(-ENOMEM);
> > > > -
> > > >       f->f_cred = get_cred(cred);
> > > >       error = security_file_alloc(f);
> > > >       if (unlikely(error)) {
> > > >               file_free_rcu(&f->f_rcuhead);
> > > > -             return ERR_PTR(error);
> > > > +             return error;
> > > >       }
> > > >
> > > >       atomic_long_set(&f->f_count, 1);
> > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > >       f->f_mode = OPEN_FMODE(flags);
> > > >       /* f->f_version: 0 */
> > > >
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +{
> > > > +     struct file *f;
> > > > +     int error;
> > > > +
> > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > +     if (unlikely(!f))
> > > > +             return ERR_PTR(-ENOMEM);
> > > > +
> > > > +     error = init_file(f, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > >       return f;
> > > >  }
> > > >
> > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > >  }
> > > >
> > > >  /*
> > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > + * and doesn't check and modify nr_files.
> > > >   *
> > > >   * Should not be used unless there's a very good reason to do so.
> > > >   */
> > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > +                                const struct cred *cred)
> > > >  {
> > > > -     struct file *f = __alloc_file(flags, cred);
> > > > +     struct file_fake *ff;
> > > > +     int error;
> > > >
> > > > -     if (!IS_ERR(f))
> > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > +     if (unlikely(!ff))
> > > > +             return ERR_PTR(-ENOMEM);
> > > >
> > > > -     return f;
> > > > +     error = init_file(&ff->file, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > +     if (fake_path) {
> > > > +             path_get(fake_path);
> > > > +             ff->fake_path = *fake_path;
> > > > +     }
> > >
> > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > NULL.
> > >
> > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > is an invitation for NULL derefs sooner or later. I would simply
> > > document that it's required to set ff->fake_path. For callers such as
> > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > should set ff->fake_path to file->f_path.
> >
> > Makes sense.
> > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > unneeded if (!error) { nesting depth.
>
> Yes, please. I had a rough sketch just for my own amusement...

Happy to make your Friday more amusing :-D

>
> fs/namei.c
>   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
>                                 const struct path *parentpath, umode_t mode,
>                                 int open_flag, const struct cred *cred)
>   {
>           struct file *file;
>           int error;
>
>           file = alloc_empty_file_fake(open_flag, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           error = vfs_tmpfile(idmap, parentpath, file, mode);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, &file->f_path);
>   }
>   EXPORT_SYMBOL(vfs_tmpfile_open);
>
> fs/internal.h
>   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
>
> fs/open.c
>   struct file *open_with_fake_path(const struct path *fake_path, int flags,
>                                    const struct path *path,
>                                    const struct cred *cred)
>   {
>           int error;
>           struct file *file;
>
>           file = alloc_empty_file_fake(flags, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           file->f_path = *path;
>           error = do_dentry_open(file, d_inode(path->dentry), NULL);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, fake_path);
>   }
>
> fs/file_table.c
>   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
>   {
>           struct file_fake *ff;
>           int error;
>
>           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
>           if (unlikely(!ff))
>                   return ERR_PTR(-ENOMEM);
>
>           error = init_file(&ff->file, flags, cred);
>           if (unlikely(error))
>                   return ERR_PTR(error);
>
>           ff->file.f_mode |= FMODE_FAKE_PATH;
>           return &ff->file;
>   }
>
>   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
>   {
>           if (file->f_mode & FMODE_FAKE_PATH) {
>                   struct file_fake *ff = file_fake(file);
>                   ff->fake_path = *fake_path;
>           }
>
>           return file;
>   }
>

Heh, I also started with file_set_fake_path() but I decided that it's not
worth it, because no code should be messing with this and I just changed
file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
in these two helpers.

I will add file_set_fake_path *only* if you insist.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 12:20         ` Amir Goldstein
@ 2023-06-09 12:54           ` Christian Brauner
  2023-06-09 13:00             ` Christian Brauner
  0 siblings, 1 reply; 27+ messages in thread
From: Christian Brauner @ 2023-06-09 12:54 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > >
> > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > is not on the same fs as f_inode.
> > > > >
> > > > > Allocate a container struct file_fake for those internal files, that
> > > > > will be used to hold the fake path qlong with the real path.
> > > > >
> > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > overlayfs internal file's f_path fake.
> > > > >
> > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > ---
> > > > >  fs/cachefiles/namei.c |  2 +-
> > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > >  fs/internal.h         |  5 ++-
> > > > >  fs/namei.c            |  2 +-
> > > > >  fs/open.c             | 11 +++---
> > > > >  fs/overlayfs/file.c   |  2 +-
> > > > >  include/linux/fs.h    | 13 ++++---
> > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > >
> > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > --- a/fs/cachefiles/namei.c
> > > > > +++ b/fs/cachefiles/namei.c
> > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > >       path.mnt = cache->mnt;
> > > > >       path.dentry = dentry;
> > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > +                                &path, cache->cache_cred);
> > > > >       if (IS_ERR(file)) {
> > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > >                                          PTR_ERR(file),
> > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > index 372653b92617..adc2a92faa52 100644
> > > > > --- a/fs/file_table.c
> > > > > +++ b/fs/file_table.c
> > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > >
> > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > >
> > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > +struct file_fake {
> > > > > +     struct file file;
> > > > > +     struct path fake_path;
> > > > > +};
> > > > > +
> > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > +{
> > > > > +     return container_of(f, struct file_fake, file);
> > > > > +}
> > > > > +
> > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > +const struct path *file_fake_path(struct file *f)
> > > > > +{
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > +             return &f->f_path;
> > > > > +
> > > > > +     return &ff->fake_path;
> > > > > +}
> > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > +
> > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > >  {
> > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > >
> > > > >       put_cred(f->f_cred);
> > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             kfree(file_fake(f));
> > > > > +     else
> > > > > +             kmem_cache_free(filp_cachep, f);
> > > > >  }
> > > > >
> > > > >  static inline void file_free(struct file *f)
> > > > >  {
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > >       security_file_free(f);
> > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             path_put(&ff->fake_path);
> > > > > +     else
> > > > >               percpu_counter_dec(&nr_files);
> > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > >  }
> > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > >  #endif
> > > > >
> > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f;
> > > > >       int error;
> > > > >
> > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > -     if (unlikely(!f))
> > > > > -             return ERR_PTR(-ENOMEM);
> > > > > -
> > > > >       f->f_cred = get_cred(cred);
> > > > >       error = security_file_alloc(f);
> > > > >       if (unlikely(error)) {
> > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > -             return ERR_PTR(error);
> > > > > +             return error;
> > > > >       }
> > > > >
> > > > >       atomic_long_set(&f->f_count, 1);
> > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > >       /* f->f_version: 0 */
> > > > >
> > > > > +     return 0;
> > > > > +}
> > > > > +
> > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +{
> > > > > +     struct file *f;
> > > > > +     int error;
> > > > > +
> > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > +     if (unlikely(!f))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > > +
> > > > > +     error = init_file(f, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > >       return f;
> > > > >  }
> > > > >
> > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > >  }
> > > > >
> > > > >  /*
> > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > + * and doesn't check and modify nr_files.
> > > > >   *
> > > > >   * Should not be used unless there's a very good reason to do so.
> > > > >   */
> > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > +                                const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > +     struct file_fake *ff;
> > > > > +     int error;
> > > > >
> > > > > -     if (!IS_ERR(f))
> > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > +     if (unlikely(!ff))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > >
> > > > > -     return f;
> > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > +     if (fake_path) {
> > > > > +             path_get(fake_path);
> > > > > +             ff->fake_path = *fake_path;
> > > > > +     }
> > > >
> > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > NULL.
> > > >
> > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > document that it's required to set ff->fake_path. For callers such as
> > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > should set ff->fake_path to file->f_path.
> > >
> > > Makes sense.
> > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > unneeded if (!error) { nesting depth.
> >
> > Yes, please. I had a rough sketch just for my own amusement...
> 
> Happy to make your Friday more amusing :-D
> 
> >
> > fs/namei.c
> >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> >                                 const struct path *parentpath, umode_t mode,
> >                                 int open_flag, const struct cred *cred)
> >   {
> >           struct file *file;
> >           int error;
> >
> >           file = alloc_empty_file_fake(open_flag, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, &file->f_path);
> >   }
> >   EXPORT_SYMBOL(vfs_tmpfile_open);
> >
> > fs/internal.h
> >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
> >
> > fs/open.c
> >   struct file *open_with_fake_path(const struct path *fake_path, int flags,
> >                                    const struct path *path,
> >                                    const struct cred *cred)
> >   {
> >           int error;
> >           struct file *file;
> >
> >           file = alloc_empty_file_fake(flags, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           file->f_path = *path;
> >           error = do_dentry_open(file, d_inode(path->dentry), NULL);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, fake_path);
> >   }
> >
> > fs/file_table.c
> >   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
> >   {
> >           struct file_fake *ff;
> >           int error;
> >
> >           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> >           if (unlikely(!ff))
> >                   return ERR_PTR(-ENOMEM);
> >
> >           error = init_file(&ff->file, flags, cred);
> >           if (unlikely(error))
> >                   return ERR_PTR(error);
> >
> >           ff->file.f_mode |= FMODE_FAKE_PATH;
> >           return &ff->file;
> >   }
> >
> >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
> >   {
> >           if (file->f_mode & FMODE_FAKE_PATH) {
> >                   struct file_fake *ff = file_fake(file);
> >                   ff->fake_path = *fake_path;
> >           }
> >
> >           return file;
> >   }
> >
> 
> Heh, I also started with file_set_fake_path() but I decided that it's not
> worth it, because no code should be messing with this and I just changed
> file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
> in these two helpers.

Hm, I don't understand. This is non-exported and only visible in thing
that can use internal.h so only core vfs coe.

The only places that use this are vfs_tmpfile_open() and
open_with_fake_path(). It allows us to remove the additional argument
from alloc_empty_file_fake() and that's what I really like because now
we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs
doing it in one step.

Hm, one thing I realized is that this moves vfs_tmpfile_open() out of
filp_cachep which isn't great, no? That's a pretty heavily used codepath
so it feels that it should probably continue to use the cache?

What about keeping FMODE_NOACCOUNT and adding FMODE_FAKE_PATH. I think
that might be better. Christoph has just removed 3 FMODE_* flags with
his decoupling of block specific flags from file generic flags. So as
far as I'm concerned this wouldn't be a problem.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 12:54           ` Christian Brauner
@ 2023-06-09 13:00             ` Christian Brauner
  2023-06-09 13:09               ` Amir Goldstein
  0 siblings, 1 reply; 27+ messages in thread
From: Christian Brauner @ 2023-06-09 13:00 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote:
> On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > > >
> > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > > is not on the same fs as f_inode.
> > > > > >
> > > > > > Allocate a container struct file_fake for those internal files, that
> > > > > > will be used to hold the fake path qlong with the real path.
> > > > > >
> > > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > > overlayfs internal file's f_path fake.
> > > > > >
> > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > > ---
> > > > > >  fs/cachefiles/namei.c |  2 +-
> > > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > > >  fs/internal.h         |  5 ++-
> > > > > >  fs/namei.c            |  2 +-
> > > > > >  fs/open.c             | 11 +++---
> > > > > >  fs/overlayfs/file.c   |  2 +-
> > > > > >  include/linux/fs.h    | 13 ++++---
> > > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > > >
> > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > > --- a/fs/cachefiles/namei.c
> > > > > > +++ b/fs/cachefiles/namei.c
> > > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > > >       path.mnt = cache->mnt;
> > > > > >       path.dentry = dentry;
> > > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > > +                                &path, cache->cache_cred);
> > > > > >       if (IS_ERR(file)) {
> > > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > > >                                          PTR_ERR(file),
> > > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > > index 372653b92617..adc2a92faa52 100644
> > > > > > --- a/fs/file_table.c
> > > > > > +++ b/fs/file_table.c
> > > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > > >
> > > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > > >
> > > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > > +struct file_fake {
> > > > > > +     struct file file;
> > > > > > +     struct path fake_path;
> > > > > > +};
> > > > > > +
> > > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > > +{
> > > > > > +     return container_of(f, struct file_fake, file);
> > > > > > +}
> > > > > > +
> > > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > > +const struct path *file_fake_path(struct file *f)
> > > > > > +{
> > > > > > +     struct file_fake *ff = file_fake(f);
> > > > > > +
> > > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > > +             return &f->f_path;
> > > > > > +
> > > > > > +     return &ff->fake_path;
> > > > > > +}
> > > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > > +
> > > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > > >  {
> > > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > > >
> > > > > >       put_cred(f->f_cred);
> > > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > > +             kfree(file_fake(f));
> > > > > > +     else
> > > > > > +             kmem_cache_free(filp_cachep, f);
> > > > > >  }
> > > > > >
> > > > > >  static inline void file_free(struct file *f)
> > > > > >  {
> > > > > > +     struct file_fake *ff = file_fake(f);
> > > > > > +
> > > > > >       security_file_free(f);
> > > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > > +             path_put(&ff->fake_path);
> > > > > > +     else
> > > > > >               percpu_counter_dec(&nr_files);
> > > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > > >  }
> > > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > > >  #endif
> > > > > >
> > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > > >  {
> > > > > > -     struct file *f;
> > > > > >       int error;
> > > > > >
> > > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > > -     if (unlikely(!f))
> > > > > > -             return ERR_PTR(-ENOMEM);
> > > > > > -
> > > > > >       f->f_cred = get_cred(cred);
> > > > > >       error = security_file_alloc(f);
> > > > > >       if (unlikely(error)) {
> > > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > > -             return ERR_PTR(error);
> > > > > > +             return error;
> > > > > >       }
> > > > > >
> > > > > >       atomic_long_set(&f->f_count, 1);
> > > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > > >       /* f->f_version: 0 */
> > > > > >
> > > > > > +     return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > > +{
> > > > > > +     struct file *f;
> > > > > > +     int error;
> > > > > > +
> > > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > > +     if (unlikely(!f))
> > > > > > +             return ERR_PTR(-ENOMEM);
> > > > > > +
> > > > > > +     error = init_file(f, flags, cred);
> > > > > > +     if (unlikely(error))
> > > > > > +             return ERR_PTR(error);
> > > > > > +
> > > > > >       return f;
> > > > > >  }
> > > > > >
> > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > > >  }
> > > > > >
> > > > > >  /*
> > > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > > + * and doesn't check and modify nr_files.
> > > > > >   *
> > > > > >   * Should not be used unless there's a very good reason to do so.
> > > > > >   */
> > > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > > +                                const struct cred *cred)
> > > > > >  {
> > > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > > +     struct file_fake *ff;
> > > > > > +     int error;
> > > > > >
> > > > > > -     if (!IS_ERR(f))
> > > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > > +     if (unlikely(!ff))
> > > > > > +             return ERR_PTR(-ENOMEM);
> > > > > >
> > > > > > -     return f;
> > > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > > +     if (unlikely(error))
> > > > > > +             return ERR_PTR(error);
> > > > > > +
> > > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > > +     if (fake_path) {
> > > > > > +             path_get(fake_path);
> > > > > > +             ff->fake_path = *fake_path;
> > > > > > +     }
> > > > >
> > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > > NULL.
> > > > >
> > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > > document that it's required to set ff->fake_path. For callers such as
> > > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > > should set ff->fake_path to file->f_path.
> > > >
> > > > Makes sense.
> > > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > > unneeded if (!error) { nesting depth.
> > >
> > > Yes, please. I had a rough sketch just for my own amusement...
> > 
> > Happy to make your Friday more amusing :-D
> > 
> > >
> > > fs/namei.c
> > >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> > >                                 const struct path *parentpath, umode_t mode,
> > >                                 int open_flag, const struct cred *cred)
> > >   {
> > >           struct file *file;
> > >           int error;
> > >
> > >           file = alloc_empty_file_fake(open_flag, cred);
> > >           if (IS_ERR(file))
> > >                   return file;
> > >
> > >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> > >           if (error) {
> > >                   fput(file);
> > >                   return ERR_PTR(error);
> > >           }
> > >
> > >           return file_set_fake_path(file, &file->f_path);
> > >   }
> > >   EXPORT_SYMBOL(vfs_tmpfile_open);
> > >
> > > fs/internal.h
> > >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
> > >
> > > fs/open.c
> > >   struct file *open_with_fake_path(const struct path *fake_path, int flags,
> > >                                    const struct path *path,
> > >                                    const struct cred *cred)
> > >   {
> > >           int error;
> > >           struct file *file;
> > >
> > >           file = alloc_empty_file_fake(flags, cred);
> > >           if (IS_ERR(file))
> > >                   return file;
> > >
> > >           file->f_path = *path;
> > >           error = do_dentry_open(file, d_inode(path->dentry), NULL);
> > >           if (error) {
> > >                   fput(file);
> > >                   return ERR_PTR(error);
> > >           }
> > >
> > >           return file_set_fake_path(file, fake_path);
> > >   }
> > >
> > > fs/file_table.c
> > >   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
> > >   {
> > >           struct file_fake *ff;
> > >           int error;
> > >
> > >           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > >           if (unlikely(!ff))
> > >                   return ERR_PTR(-ENOMEM);
> > >
> > >           error = init_file(&ff->file, flags, cred);
> > >           if (unlikely(error))
> > >                   return ERR_PTR(error);
> > >
> > >           ff->file.f_mode |= FMODE_FAKE_PATH;
> > >           return &ff->file;
> > >   }
> > >
> > >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
> > >   {
> > >           if (file->f_mode & FMODE_FAKE_PATH) {
> > >                   struct file_fake *ff = file_fake(file);
> > >                   ff->fake_path = *fake_path;
> > >           }
> > >
> > >           return file;
> > >   }
> > >
> > 
> > Heh, I also started with file_set_fake_path() but I decided that it's not
> > worth it, because no code should be messing with this and I just changed
> > file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
> > in these two helpers.
> 
> Hm, I don't understand. This is non-exported and only visible in thing
> that can use internal.h so only core vfs coe.
> 
> The only places that use this are vfs_tmpfile_open() and
> open_with_fake_path(). It allows us to remove the additional argument
> from alloc_empty_file_fake() and that's what I really like because now
> we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs
> doing it in one step.
> 
> Hm, one thing I realized is that this moves vfs_tmpfile_open() out of
> filp_cachep which isn't great, no? That's a pretty heavily used codepath
> so it feels that it should probably continue to use the cache?

Uh, I misread as that's only used in cachefiles and in overlayfs so it's
probably fine. I thought this was the generic version. Though it might
still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct
since there's really no reason why tmpfiles should partake in the fake
path stuff...

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 13:00             ` Christian Brauner
@ 2023-06-09 13:09               ` Amir Goldstein
  0 siblings, 0 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 13:09 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, David Howells

On Fri, Jun 9, 2023 at 4:00 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote:
[...]
>
> Uh, I misread as that's only used in cachefiles and in overlayfs so it's
> probably fine. I thought this was the generic version. Though it might
> still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct
> since there's really no reason why tmpfiles should partake in the fake
> path stuff...

The reason is (wait for it) no more available bits in f_flags.
Yeh, there is one place left in 0x4000000, but I didn't want to
waste it given that FMODE_NOACCOUNT and FMODE_FAKE_PATH
use cases are pretty close.

BTW, you reminded me that I forgot to CC dhowells (add now).

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
                   ` (2 preceding siblings ...)
  2023-06-09  7:32 ` [PATCH 3/3] fs: store fake path in file_fake along with real path Amir Goldstein
@ 2023-06-09 13:15 ` Miklos Szeredi
  2023-06-09 14:28   ` Amir Goldstein
  2023-06-09 13:15 ` Tetsuo Handa
  4 siblings, 1 reply; 27+ messages in thread
From: Miklos Szeredi @ 2023-06-09 13:15 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
>
> Miklos,
>
> This is the solution that we discussed for removing FMODE_NONOTIFY
> from overlayfs real files.
>
> My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> I am still testing the ovl-fsnotify interaction, so we can defer
> that step to later.
>
> I wanted to post this series earlier to give more time for fsdevel
> feedback and if these patches get your blessing and the blessing of
> vfs maintainers, it is probably better that they will go through the
> vfs tree.
>
> I've tested that overlay "fake" path are still shown in /proc/self/maps
> and in the /proc/self/exe and /proc/self/map_files/ symlinks.
>
> The audit and tomoyo use of file_fake_path() is not tested
> (CC maintainers), but they both look like user displayed paths,
> so I assumed they's want to preserve the existing behavior
> (i.e. displaying the fake overlayfs path).

I did an audit of all ->vm_file  and found a couple of missing ones:

dump_common_audit_data
ima_file_mprotect
common_file_perm (I don't understand the code enough to know whether
it needs fake dentry or not)
aa_file_perm
__file_path_perm
print_bad_pte
file_path
seq_print_user_ip
__mnt_want_write_file
__mnt_drop_write_file
file_dentry_name

Didn't go into drivers/ and didn't follow indirect calls (e.g.
f_op->fsysnc).  I also may have missed something along the way, but my
guess is that I did catch most cases.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
                   ` (3 preceding siblings ...)
  2023-06-09 13:15 ` [PATCH 0/3] Reduce impact of overlayfs fake path files Miklos Szeredi
@ 2023-06-09 13:15 ` Tetsuo Handa
  2023-06-09 13:54   ` Amir Goldstein
  4 siblings, 1 reply; 27+ messages in thread
From: Tetsuo Handa @ 2023-06-09 13:15 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, linux-fsdevel,
	linux-unionfs

On 2023/06/09 16:32, Amir Goldstein wrote:
> The audit and tomoyo use of file_fake_path() is not tested
> (CC maintainers), but they both look like user displayed paths,
> so I assumed they's want to preserve the existing behavior
> (i.e. displaying the fake overlayfs path).

Since I'm not using overlayfs, I don't know the difference between
real path and fake path. Would you explain using command line example?

  mkdir what_path1
  mkdir what_path2
  mkdir what_path3
  mount -t overlayfs ...what_paths_come_here?...

what the pathname returned by wrapping with file_fake_path() is, and
what the pathname returned by not wrapping with file_fake_path() is?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 13:15 ` Tetsuo Handa
@ 2023-06-09 13:54   ` Amir Goldstein
  0 siblings, 0 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 13:54 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Miklos Szeredi, Christian Brauner, Al Viro, Jan Kara, Paul Moore,
	linux-fsdevel, linux-unionfs

On Fri, Jun 9, 2023 at 4:16 PM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> On 2023/06/09 16:32, Amir Goldstein wrote:
> > The audit and tomoyo use of file_fake_path() is not tested
> > (CC maintainers), but they both look like user displayed paths,
> > so I assumed they's want to preserve the existing behavior
> > (i.e. displaying the fake overlayfs path).
>
> Since I'm not using overlayfs, I don't know the difference between
> real path and fake path. Would you explain using command line example?
>
>   mkdir what_path1
>   mkdir what_path2
>   mkdir what_path3
>   mount -t overlayfs ...what_paths_come_here?...

For example:
mount -t overlayfs overlay /mnt/ovl \
          -o lowerdir=what_path1:what_path2:what_path3

>
> what the pathname returned by wrapping with file_fake_path() is, and
> what the pathname returned by not wrapping with file_fake_path() is?
>

It depends. if you have an audit rule on /mnt/ovl the path is
always the /mnt/ovl/... path (fake_path as well).

If you have an audit rule on what_path1 the fake path is /mnt/ovl/...
and the real path is /... (the relative path from what_path1).
In both cases, the filename itself will be correct.
If the rule prints something like %pD2 then it does not really
matter which path you use.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 13:15 ` [PATCH 0/3] Reduce impact of overlayfs fake path files Miklos Szeredi
@ 2023-06-09 14:28   ` Amir Goldstein
  2023-06-09 14:42     ` Amir Goldstein
  2023-06-09 15:27     ` Mimi Zohar
  0 siblings, 2 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 14:28 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > Miklos,
> >
> > This is the solution that we discussed for removing FMODE_NONOTIFY
> > from overlayfs real files.
> >
> > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > I am still testing the ovl-fsnotify interaction, so we can defer
> > that step to later.
> >
> > I wanted to post this series earlier to give more time for fsdevel
> > feedback and if these patches get your blessing and the blessing of
> > vfs maintainers, it is probably better that they will go through the
> > vfs tree.
> >
> > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> >
> > The audit and tomoyo use of file_fake_path() is not tested
> > (CC maintainers), but they both look like user displayed paths,
> > so I assumed they's want to preserve the existing behavior
> > (i.e. displaying the fake overlayfs path).
>
> I did an audit of all ->vm_file  and found a couple of missing ones:

Wait, but why only ->vm_file?
We were under the assumption the fake path is only needed
for mapped files, but the list below suggests that it matters
to other file operations as well...

>
> dump_common_audit_data
> ima_file_mprotect
> common_file_perm (I don't understand the code enough to know whether
> it needs fake dentry or not)
> aa_file_perm
> __file_path_perm
> print_bad_pte
> file_path
> seq_print_user_ip
> __mnt_want_write_file
> __mnt_drop_write_file
> file_dentry_name
>
> Didn't go into drivers/ and didn't follow indirect calls (e.g.
> f_op->fsysnc).  I also may have missed something along the way, but my
> guess is that I did catch most cases.

Wow. So much for 3-4 special cases...

Confused by some of the above.

Why would we want __mnt_want_write_file on the fake path?
We'd already taken __mnt_want_write on overlay and with
real file we need __mnt_want_write on the real path.

For IMA/LSMs, I'd imagine that like fanotify, they would rather get
the real path where the real policy is stored.
If some log files end with relative path instead of full fake path
it's probably not the worst outcome.

Thoughts?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 14:28   ` Amir Goldstein
@ 2023-06-09 14:42     ` Amir Goldstein
  2023-06-09 15:00       ` Miklos Szeredi
  2023-06-09 15:27     ` Mimi Zohar
  1 sibling, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 14:42 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > Miklos,
> > >
> > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > from overlayfs real files.
> > >
> > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > that step to later.
> > >
> > > I wanted to post this series earlier to give more time for fsdevel
> > > feedback and if these patches get your blessing and the blessing of
> > > vfs maintainers, it is probably better that they will go through the
> > > vfs tree.
> > >
> > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > >
> > > The audit and tomoyo use of file_fake_path() is not tested
> > > (CC maintainers), but they both look like user displayed paths,
> > > so I assumed they's want to preserve the existing behavior
> > > (i.e. displaying the fake overlayfs path).
> >
> > I did an audit of all ->vm_file  and found a couple of missing ones:
>
> Wait, but why only ->vm_file?
> We were under the assumption the fake path is only needed
> for mapped files, but the list below suggests that it matters
> to other file operations as well...
>
> >
> > dump_common_audit_data
> > ima_file_mprotect
> > common_file_perm (I don't understand the code enough to know whether
> > it needs fake dentry or not)
> > aa_file_perm
> > __file_path_perm
> > print_bad_pte
> > file_path
> > seq_print_user_ip
> > __mnt_want_write_file
> > __mnt_drop_write_file
> > file_dentry_name
> >
> > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > f_op->fsysnc).  I also may have missed something along the way, but my
> > guess is that I did catch most cases.
>
> Wow. So much for 3-4 special cases...
>
> Confused by some of the above.
>
> Why would we want __mnt_want_write_file on the fake path?
> We'd already taken __mnt_want_write on overlay and with
> real file we need __mnt_want_write on the real path.
>
> For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> the real path where the real policy is stored.
> If some log files end with relative path instead of full fake path
> it's probably not the worst outcome.
>
> Thoughts?

Considering the results of your audit, I think I prefer to keep
f_path fake and store real_path in struct file_fake for code
that wants the real path.

This will keep all logic unchanged, which is better for my health.
and only fsnotify (for now) will start using f_real_path() to
generate events on real fs objects.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 14:42     ` Amir Goldstein
@ 2023-06-09 15:00       ` Miklos Szeredi
  2023-06-09 19:17         ` Amir Goldstein
                           ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Miklos Szeredi @ 2023-06-09 15:00 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, 9 Jun 2023 at 16:42, Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > >
> > > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > > >
> > > > Miklos,
> > > >
> > > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > > from overlayfs real files.
> > > >
> > > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > > that step to later.
> > > >
> > > > I wanted to post this series earlier to give more time for fsdevel
> > > > feedback and if these patches get your blessing and the blessing of
> > > > vfs maintainers, it is probably better that they will go through the
> > > > vfs tree.
> > > >
> > > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > > >
> > > > The audit and tomoyo use of file_fake_path() is not tested
> > > > (CC maintainers), but they both look like user displayed paths,
> > > > so I assumed they's want to preserve the existing behavior
> > > > (i.e. displaying the fake overlayfs path).
> > >
> > > I did an audit of all ->vm_file  and found a couple of missing ones:
> >
> > Wait, but why only ->vm_file?

Because we don't get to intercept vm_ops, so anything done through
mmaps will not go though overlayfs.   That would result in apparmor
missing these, for example.

> > We were under the assumption the fake path is only needed
> > for mapped files, but the list below suggests that it matters
> > to other file operations as well...
> >
> > >
> > > dump_common_audit_data
> > > ima_file_mprotect
> > > common_file_perm (I don't understand the code enough to know whether
> > > it needs fake dentry or not)
> > > aa_file_perm
> > > __file_path_perm
> > > print_bad_pte
> > > file_path
> > > seq_print_user_ip
> > > __mnt_want_write_file
> > > __mnt_drop_write_file
> > > file_dentry_name
> > >
> > > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > > f_op->fsysnc).  I also may have missed something along the way, but my
> > > guess is that I did catch most cases.
> >
> > Wow. So much for 3-4 special cases...
> >
> > Confused by some of the above.
> >
> > Why would we want __mnt_want_write_file on the fake path?
> > We'd already taken __mnt_want_write on overlay and with
> > real file we need __mnt_want_write on the real path.

It's for write faults on memory maps.   The code already branches on
file->f_mode, I don't think it would be a big performance hit to check
FMODE_FAKE_PATH.

> >
> > For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> > the real path where the real policy is stored.
> > If some log files end with relative path instead of full fake path
> > it's probably not the worst outcome.
> >
> > Thoughts?
>
> Considering the results of your audit, I think I prefer to keep
> f_path fake and store real_path in struct file_fake for code
> that wants the real path.
>
> This will keep all logic unchanged, which is better for my health.
> and only fsnotify (for now) will start using f_real_path() to
> generate events on real fs objects.

That's also an option.

I think f_fake_path() would still be a move in the right direction.
We have 46 instances of file_dentry() currently and of those special
cases most are cosmetic, while missing file_dentry() ones are
crashable.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 14:28   ` Amir Goldstein
  2023-06-09 14:42     ` Amir Goldstein
@ 2023-06-09 15:27     ` Mimi Zohar
  1 sibling, 0 replies; 27+ messages in thread
From: Mimi Zohar @ 2023-06-09 15:27 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, 2023-06-09 at 17:28 +0300, Amir Goldstein wrote:
> For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> the real path where the real policy is stored.

Definitely!

> If some log files end with relative path instead of full fake path
> it's probably not the worst outcome.

-- 
thanks,

Mimi


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 15:00       ` Miklos Szeredi
@ 2023-06-09 19:17         ` Amir Goldstein
  2023-06-12  7:57         ` Christian Brauner
  2023-10-02 15:32         ` Amir Goldstein
  2 siblings, 0 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-06-09 19:17 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, Jun 9, 2023 at 6:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, 9 Jun 2023 at 16:42, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > >
> > > > > Miklos,
> > > > >
> > > > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > > > from overlayfs real files.
> > > > >
> > > > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > > > that step to later.
> > > > >
> > > > > I wanted to post this series earlier to give more time for fsdevel
> > > > > feedback and if these patches get your blessing and the blessing of
> > > > > vfs maintainers, it is probably better that they will go through the
> > > > > vfs tree.
> > > > >
> > > > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > > > >
> > > > > The audit and tomoyo use of file_fake_path() is not tested
> > > > > (CC maintainers), but they both look like user displayed paths,
> > > > > so I assumed they's want to preserve the existing behavior
> > > > > (i.e. displaying the fake overlayfs path).
> > > >
> > > > I did an audit of all ->vm_file  and found a couple of missing ones:
> > >
> > > Wait, but why only ->vm_file?
>
> Because we don't get to intercept vm_ops, so anything done through
> mmaps will not go though overlayfs.   That would result in apparmor
> missing these, for example.
>
> > > We were under the assumption the fake path is only needed
> > > for mapped files, but the list below suggests that it matters
> > > to other file operations as well...
> > >
> > > >
> > > > dump_common_audit_data
> > > > ima_file_mprotect
> > > > common_file_perm (I don't understand the code enough to know whether
> > > > it needs fake dentry or not)
> > > > aa_file_perm
> > > > __file_path_perm
> > > > print_bad_pte
> > > > file_path
> > > > seq_print_user_ip
> > > > __mnt_want_write_file
> > > > __mnt_drop_write_file
> > > > file_dentry_name
> > > >
> > > > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > > > f_op->fsysnc).  I also may have missed something along the way, but my
> > > > guess is that I did catch most cases.
> > >
> > > Wow. So much for 3-4 special cases...
> > >
> > > Confused by some of the above.
> > >
> > > Why would we want __mnt_want_write_file on the fake path?
> > > We'd already taken __mnt_want_write on overlay and with
> > > real file we need __mnt_want_write on the real path.
>
> It's for write faults on memory maps.   The code already branches on
> file->f_mode, I don't think it would be a big performance hit to check
> FMODE_FAKE_PATH.
>

Yes, but all those lower leven helpers are also called for read/write ops
on the same realfile object, where we do not want them to act on the
fake path. That's the reason I started with this conversion in the first
place. Maybe I am missing something in the big picture, but for now
the next steps are clear to me:

1. Store both real+fake paths in file_fake container
2. f_path remains fake now and maybe will be changed later
3. f_real_path() will be used now in fsnotify
4. Once we have a plan, we can start adding f_fake_path()
     calls for the mapped file code paths and one day, we may
     be able to let f_path be real

I will post v3 with steps 1-3.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-09 12:12       ` Christian Brauner
  2023-06-09 12:20         ` Amir Goldstein
@ 2023-06-11 19:11         ` Amir Goldstein
  2023-06-12  7:55           ` Christian Brauner
  1 sibling, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-06-11 19:11 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > is not on the same fs as f_inode.
> > > >
> > > > Allocate a container struct file_fake for those internal files, that
> > > > will be used to hold the fake path qlong with the real path.
> > > >
> > > > This commit does not populate the extra fake_path field and leaves the
> > > > overlayfs internal file's f_path fake.
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >  fs/cachefiles/namei.c |  2 +-
> > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > >  fs/internal.h         |  5 ++-
> > > >  fs/namei.c            |  2 +-
> > > >  fs/open.c             | 11 +++---
> > > >  fs/overlayfs/file.c   |  2 +-
> > > >  include/linux/fs.h    | 13 ++++---
> > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > --- a/fs/cachefiles/namei.c
> > > > +++ b/fs/cachefiles/namei.c
> > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > >       path.mnt = cache->mnt;
> > > >       path.dentry = dentry;
> > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > +                                &path, cache->cache_cred);
> > > >       if (IS_ERR(file)) {
> > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > >                                          PTR_ERR(file),
> > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > index 372653b92617..adc2a92faa52 100644
> > > > --- a/fs/file_table.c
> > > > +++ b/fs/file_table.c
> > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > >
> > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > >
> > > > +/* Container for file with optional fake path to display in /proc files */
> > > > +struct file_fake {
> > > > +     struct file file;
> > > > +     struct path fake_path;
> > > > +};
> > > > +
> > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > +{
> > > > +     return container_of(f, struct file_fake, file);
> > > > +}
> > > > +
> > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > +const struct path *file_fake_path(struct file *f)
> > > > +{
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > +             return &f->f_path;
> > > > +
> > > > +     return &ff->fake_path;
> > > > +}
> > > > +EXPORT_SYMBOL(file_fake_path);
> > > > +
> > > >  static void file_free_rcu(struct rcu_head *head)
> > > >  {
> > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > >
> > > >       put_cred(f->f_cred);
> > > > -     kmem_cache_free(filp_cachep, f);
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             kfree(file_fake(f));
> > > > +     else
> > > > +             kmem_cache_free(filp_cachep, f);
> > > >  }
> > > >
> > > >  static inline void file_free(struct file *f)
> > > >  {
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > >       security_file_free(f);
> > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             path_put(&ff->fake_path);
> > > > +     else
> > > >               percpu_counter_dec(&nr_files);
> > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > >  }
> > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > >  fs_initcall(init_fs_stat_sysctls);
> > > >  #endif
> > > >
> > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > >  {
> > > > -     struct file *f;
> > > >       int error;
> > > >
> > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > -     if (unlikely(!f))
> > > > -             return ERR_PTR(-ENOMEM);
> > > > -
> > > >       f->f_cred = get_cred(cred);
> > > >       error = security_file_alloc(f);
> > > >       if (unlikely(error)) {
> > > >               file_free_rcu(&f->f_rcuhead);
> > > > -             return ERR_PTR(error);
> > > > +             return error;
> > > >       }
> > > >
> > > >       atomic_long_set(&f->f_count, 1);
> > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > >       f->f_mode = OPEN_FMODE(flags);
> > > >       /* f->f_version: 0 */
> > > >
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +{
> > > > +     struct file *f;
> > > > +     int error;
> > > > +
> > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > +     if (unlikely(!f))
> > > > +             return ERR_PTR(-ENOMEM);
> > > > +
> > > > +     error = init_file(f, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > >       return f;
> > > >  }
> > > >
> > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > >  }
> > > >
> > > >  /*
> > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > + * and doesn't check and modify nr_files.
> > > >   *
> > > >   * Should not be used unless there's a very good reason to do so.
> > > >   */
> > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > +                                const struct cred *cred)
> > > >  {
> > > > -     struct file *f = __alloc_file(flags, cred);
> > > > +     struct file_fake *ff;
> > > > +     int error;
> > > >
> > > > -     if (!IS_ERR(f))
> > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > +     if (unlikely(!ff))
> > > > +             return ERR_PTR(-ENOMEM);
> > > >
> > > > -     return f;
> > > > +     error = init_file(&ff->file, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > +     if (fake_path) {
> > > > +             path_get(fake_path);
> > > > +             ff->fake_path = *fake_path;
> > > > +     }
> > >
> > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > NULL.
> > >
> > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > is an invitation for NULL derefs sooner or later. I would simply
> > > document that it's required to set ff->fake_path. For callers such as
> > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > should set ff->fake_path to file->f_path.
> >
> > Makes sense.
> > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > unneeded if (!error) { nesting depth.
>
> Yes, please. I had a rough sketch just for my own amusement...
>
> fs/namei.c
>   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
>                                 const struct path *parentpath, umode_t mode,
>                                 int open_flag, const struct cred *cred)
>   {
>           struct file *file;
>           int error;
>
>           file = alloc_empty_file_fake(open_flag, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           error = vfs_tmpfile(idmap, parentpath, file, mode);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, &file->f_path);

FYI, this is not enough to guarantee that the fake_path cannot
be empty, for example in fput() above.
So I did keep the real_path empty in this case in v3 and
I have an accessor that verifies that real_path is not empty
before returning it.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] fs: use fake_file container for internal files with fake f_path
  2023-06-11 19:11         ` Amir Goldstein
@ 2023-06-12  7:55           ` Christian Brauner
  0 siblings, 0 replies; 27+ messages in thread
From: Christian Brauner @ 2023-06-12  7:55 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs

On Sun, Jun 11, 2023 at 10:11:29PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > >
> > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > is not on the same fs as f_inode.
> > > > >
> > > > > Allocate a container struct file_fake for those internal files, that
> > > > > will be used to hold the fake path qlong with the real path.
> > > > >
> > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > overlayfs internal file's f_path fake.
> > > > >
> > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > ---
> > > > >  fs/cachefiles/namei.c |  2 +-
> > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > >  fs/internal.h         |  5 ++-
> > > > >  fs/namei.c            |  2 +-
> > > > >  fs/open.c             | 11 +++---
> > > > >  fs/overlayfs/file.c   |  2 +-
> > > > >  include/linux/fs.h    | 13 ++++---
> > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > >
> > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > --- a/fs/cachefiles/namei.c
> > > > > +++ b/fs/cachefiles/namei.c
> > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > >       path.mnt = cache->mnt;
> > > > >       path.dentry = dentry;
> > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > +                                &path, cache->cache_cred);
> > > > >       if (IS_ERR(file)) {
> > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > >                                          PTR_ERR(file),
> > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > index 372653b92617..adc2a92faa52 100644
> > > > > --- a/fs/file_table.c
> > > > > +++ b/fs/file_table.c
> > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > >
> > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > >
> > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > +struct file_fake {
> > > > > +     struct file file;
> > > > > +     struct path fake_path;
> > > > > +};
> > > > > +
> > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > +{
> > > > > +     return container_of(f, struct file_fake, file);
> > > > > +}
> > > > > +
> > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > +const struct path *file_fake_path(struct file *f)
> > > > > +{
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > +             return &f->f_path;
> > > > > +
> > > > > +     return &ff->fake_path;
> > > > > +}
> > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > +
> > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > >  {
> > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > >
> > > > >       put_cred(f->f_cred);
> > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             kfree(file_fake(f));
> > > > > +     else
> > > > > +             kmem_cache_free(filp_cachep, f);
> > > > >  }
> > > > >
> > > > >  static inline void file_free(struct file *f)
> > > > >  {
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > >       security_file_free(f);
> > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             path_put(&ff->fake_path);
> > > > > +     else
> > > > >               percpu_counter_dec(&nr_files);
> > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > >  }
> > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > >  #endif
> > > > >
> > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f;
> > > > >       int error;
> > > > >
> > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > -     if (unlikely(!f))
> > > > > -             return ERR_PTR(-ENOMEM);
> > > > > -
> > > > >       f->f_cred = get_cred(cred);
> > > > >       error = security_file_alloc(f);
> > > > >       if (unlikely(error)) {
> > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > -             return ERR_PTR(error);
> > > > > +             return error;
> > > > >       }
> > > > >
> > > > >       atomic_long_set(&f->f_count, 1);
> > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > >       /* f->f_version: 0 */
> > > > >
> > > > > +     return 0;
> > > > > +}
> > > > > +
> > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +{
> > > > > +     struct file *f;
> > > > > +     int error;
> > > > > +
> > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > +     if (unlikely(!f))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > > +
> > > > > +     error = init_file(f, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > >       return f;
> > > > >  }
> > > > >
> > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > >  }
> > > > >
> > > > >  /*
> > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > + * and doesn't check and modify nr_files.
> > > > >   *
> > > > >   * Should not be used unless there's a very good reason to do so.
> > > > >   */
> > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > +                                const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > +     struct file_fake *ff;
> > > > > +     int error;
> > > > >
> > > > > -     if (!IS_ERR(f))
> > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > +     if (unlikely(!ff))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > >
> > > > > -     return f;
> > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > +     if (fake_path) {
> > > > > +             path_get(fake_path);
> > > > > +             ff->fake_path = *fake_path;
> > > > > +     }
> > > >
> > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > NULL.
> > > >
> > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > document that it's required to set ff->fake_path. For callers such as
> > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > should set ff->fake_path to file->f_path.
> > >
> > > Makes sense.
> > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > unneeded if (!error) { nesting depth.
> >
> > Yes, please. I had a rough sketch just for my own amusement...
> >
> > fs/namei.c
> >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> >                                 const struct path *parentpath, umode_t mode,
> >                                 int open_flag, const struct cred *cred)
> >   {
> >           struct file *file;
> >           int error;
> >
> >           file = alloc_empty_file_fake(open_flag, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, &file->f_path);
> 
> FYI, this is not enough to guarantee that the fake_path cannot
> be empty, for example in fput() above.
> So I did keep the real_path empty in this case in v3 and
> I have an accessor that verifies that real_path is not empty
> before returning it.

Ok, I'm just making it to v3 now.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 15:00       ` Miklos Szeredi
  2023-06-09 19:17         ` Amir Goldstein
@ 2023-06-12  7:57         ` Christian Brauner
  2023-10-02 15:32         ` Amir Goldstein
  2 siblings, 0 replies; 27+ messages in thread
From: Christian Brauner @ 2023-06-12  7:57 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Amir Goldstein, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, Jun 09, 2023 at 05:00:25PM +0200, Miklos Szeredi wrote:
> On Fri, 9 Jun 2023 at 16:42, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > >
> > > > > Miklos,
> > > > >
> > > > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > > > from overlayfs real files.
> > > > >
> > > > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > > > that step to later.
> > > > >
> > > > > I wanted to post this series earlier to give more time for fsdevel
> > > > > feedback and if these patches get your blessing and the blessing of
> > > > > vfs maintainers, it is probably better that they will go through the
> > > > > vfs tree.
> > > > >
> > > > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > > > >
> > > > > The audit and tomoyo use of file_fake_path() is not tested
> > > > > (CC maintainers), but they both look like user displayed paths,
> > > > > so I assumed they's want to preserve the existing behavior
> > > > > (i.e. displaying the fake overlayfs path).
> > > >
> > > > I did an audit of all ->vm_file  and found a couple of missing ones:
> > >
> > > Wait, but why only ->vm_file?
> 
> Because we don't get to intercept vm_ops, so anything done through
> mmaps will not go though overlayfs.   That would result in apparmor
> missing these, for example.
> 
> > > We were under the assumption the fake path is only needed
> > > for mapped files, but the list below suggests that it matters
> > > to other file operations as well...
> > >
> > > >
> > > > dump_common_audit_data
> > > > ima_file_mprotect
> > > > common_file_perm (I don't understand the code enough to know whether
> > > > it needs fake dentry or not)
> > > > aa_file_perm
> > > > __file_path_perm
> > > > print_bad_pte
> > > > file_path
> > > > seq_print_user_ip
> > > > __mnt_want_write_file
> > > > __mnt_drop_write_file
> > > > file_dentry_name
> > > >
> > > > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > > > f_op->fsysnc).  I also may have missed something along the way, but my
> > > > guess is that I did catch most cases.
> > >
> > > Wow. So much for 3-4 special cases...
> > >
> > > Confused by some of the above.
> > >
> > > Why would we want __mnt_want_write_file on the fake path?
> > > We'd already taken __mnt_want_write on overlay and with
> > > real file we need __mnt_want_write on the real path.
> 
> It's for write faults on memory maps.   The code already branches on
> file->f_mode, I don't think it would be a big performance hit to check
> FMODE_FAKE_PATH.
> 
> > >
> > > For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> > > the real path where the real policy is stored.
> > > If some log files end with relative path instead of full fake path
> > > it's probably not the worst outcome.
> > >
> > > Thoughts?
> >
> > Considering the results of your audit, I think I prefer to keep
> > f_path fake and store real_path in struct file_fake for code
> > that wants the real path.
> >
> > This will keep all logic unchanged, which is better for my health.
> > and only fsnotify (for now) will start using f_real_path() to
> > generate events on real fs objects.
> 
> That's also an option.

Ideally we keep the generic file infrastructure separate from the
conversion of the various places because the latter operation is the
really sensitive part.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-06-09 15:00       ` Miklos Szeredi
  2023-06-09 19:17         ` Amir Goldstein
  2023-06-12  7:57         ` Christian Brauner
@ 2023-10-02 15:32         ` Amir Goldstein
  2023-10-04 15:29           ` Amir Goldstein
  2 siblings, 1 reply; 27+ messages in thread
From: Amir Goldstein @ 2023-10-02 15:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Christian Brauner, Al Viro, Jan Kara, Paul Moore, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar

On Fri, Jun 9, 2023 at 6:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, 9 Jun 2023 at 16:42, Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > >
> > > > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > >
> > > > > Miklos,
> > > > >
> > > > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > > > from overlayfs real files.
> > > > >
> > > > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > > > that step to later.
> > > > >
> > > > > I wanted to post this series earlier to give more time for fsdevel
> > > > > feedback and if these patches get your blessing and the blessing of
> > > > > vfs maintainers, it is probably better that they will go through the
> > > > > vfs tree.
> > > > >
> > > > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > > > >
> > > > > The audit and tomoyo use of file_fake_path() is not tested
> > > > > (CC maintainers), but they both look like user displayed paths,
> > > > > so I assumed they's want to preserve the existing behavior
> > > > > (i.e. displaying the fake overlayfs path).
> > > >
> > > > I did an audit of all ->vm_file  and found a couple of missing ones:
> > >

Hi Miklos,

Trying to get back to this.

> > > Wait, but why only ->vm_file?
>
> Because we don't get to intercept vm_ops, so anything done through
> mmaps will not go though overlayfs.   That would result in apparmor
> missing these, for example.
>

As discussed, unless we split ovl realfile to 3 variants: lower/upper/mmap
vm_file will be a backing_file and so will the read/write realfile,
sometimes the low level helpers need the real path and sometimes
then need the fake path.

> > > We were under the assumption the fake path is only needed
> > > for mapped files, but the list below suggests that it matters
> > > to other file operations as well...
> > >
> > > >
> > > > dump_common_audit_data

This is an example of a generic helper.
There is no way of knowing if it really wants the real or fake path.
It depends if the audit rule was set on ovl path or on real path,
but if audit rule was set on real path, we should not let the fake path
avert the audit, same as we should not have let the real file avert
fsnotify events.

It seems to me the audit rules are set on inodes and compare
st_dev/st_ino, so it is more likely that for operations on the realfile,
the real path makes more sense to print.

> > > > ima_file_mprotect

From the little experience I have with overlayfs and IMA,
nothing works much wrt measuring and verifying operations
on the real files.

> > > > common_file_perm (I don't understand the code enough to know whether
> > > > it needs fake dentry or not)
> > > > aa_file_perm
> > > > __file_path_perm

Same rationale as in audit.
We do not want to avert permission hooks for rules that were set
on the real inode/path, so it makes way more sense to use real path
in these common helpers.

Printing the wrong path is not as bad as not printing an audit!

> > > > print_bad_pte

I am not very concerned about printing real path in kmsg errors.
Those errors are more likely cause by underlying fs/block issues anyway?

> > > > file_path

Too low level.
Call sites that need to print the fake path can use d_path()

> > > > seq_print_user_ip

Yes.

> > > > __mnt_want_write_file
> > > > __mnt_drop_write_file
> > > > file_dentry_name

Too low level.

> > > >
> > > > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > > > f_op->fsysnc).  I also may have missed something along the way, but my
> > > > guess is that I did catch most cases.
> > >
> > > Wow. So much for 3-4 special cases...
> > >
> > > Confused by some of the above.
> > >
> > > Why would we want __mnt_want_write_file on the fake path?
> > > We'd already taken __mnt_want_write on overlay and with
> > > real file we need __mnt_want_write on the real path.
>
> It's for write faults on memory maps.   The code already branches on
> file->f_mode, I don't think it would be a big performance hit to check
> FMODE_FAKE_PATH.
>

Again, FMODE_BACKING is set on the same realfile that is used
for read/write_iter. Unless you will be willing to allocate two realfiles.

I added a patch to take mnt_writers refcount for both fake and real path
for writable file.
Page faults only need to take freeze protection on real sb. no?

> > >
> > > For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> > > the real path where the real policy is stored.
> > > If some log files end with relative path instead of full fake path
> > > it's probably not the worst outcome.
> > >
> > > Thoughts?
> >
> > Considering the results of your audit, I think I prefer to keep
> > f_path fake and store real_path in struct file_fake for code
> > that wants the real path.
> >
> > This will keep all logic unchanged, which is better for my health.
> > and only fsnotify (for now) will start using f_real_path() to
> > generate events on real fs objects.
>
> That's also an option.
>
> I think f_fake_path() would still be a move in the right direction.
> We have 46 instances of file_dentry() currently and of those special
> cases most are cosmetic, while missing file_dentry() ones are
> crashable.
>

Here is what I have so far:

https://github.com/amir73il/linux/commits/ovl_fake_path

I tested it with fstests and nothing popped up.
If this looks like a good start to you, I can throw it at linux-next and
see and anything floats.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 0/3] Reduce impact of overlayfs fake path files
  2023-10-02 15:32         ` Amir Goldstein
@ 2023-10-04 15:29           ` Amir Goldstein
  0 siblings, 0 replies; 27+ messages in thread
From: Amir Goldstein @ 2023-10-04 15:29 UTC (permalink / raw)
  To: Paul Moore
  Cc: Christian Brauner, Al Viro, Jan Kara, Tetsuo Handa,
	linux-fsdevel, linux-unionfs, Mimi Zohar, Miklos Szeredi

On Mon, Oct 2, 2023 at 6:32 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Fri, Jun 9, 2023 at 6:00 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> >
> > On Fri, 9 Jun 2023 at 16:42, Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Fri, Jun 9, 2023 at 5:28 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > > >
> > > > On Fri, Jun 9, 2023 at 4:15 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > > > >
> > > > > On Fri, 9 Jun 2023 at 09:32, Amir Goldstein <amir73il@gmail.com> wrote:
> > > > > >
> > > > > > Miklos,
> > > > > >
> > > > > > This is the solution that we discussed for removing FMODE_NONOTIFY
> > > > > > from overlayfs real files.
> > > > > >
> > > > > > My branch [1] has an extra patch for remove FMODE_NONOTIFY, but
> > > > > > I am still testing the ovl-fsnotify interaction, so we can defer
> > > > > > that step to later.
> > > > > >
> > > > > > I wanted to post this series earlier to give more time for fsdevel
> > > > > > feedback and if these patches get your blessing and the blessing of
> > > > > > vfs maintainers, it is probably better that they will go through the
> > > > > > vfs tree.
> > > > > >
> > > > > > I've tested that overlay "fake" path are still shown in /proc/self/maps
> > > > > > and in the /proc/self/exe and /proc/self/map_files/ symlinks.
> > > > > >
> > > > > > The audit and tomoyo use of file_fake_path() is not tested
> > > > > > (CC maintainers), but they both look like user displayed paths,
> > > > > > so I assumed they's want to preserve the existing behavior
> > > > > > (i.e. displaying the fake overlayfs path).
> > > > >
> > > > > I did an audit of all ->vm_file  and found a couple of missing ones:
> > > >
>
> Hi Miklos,
>
> Trying to get back to this.
>
> > > > Wait, but why only ->vm_file?
> >
> > Because we don't get to intercept vm_ops, so anything done through
> > mmaps will not go though overlayfs.   That would result in apparmor
> > missing these, for example.
> >
>
> As discussed, unless we split ovl realfile to 3 variants: lower/upper/mmap
> vm_file will be a backing_file and so will the read/write realfile,
> sometimes the low level helpers need the real path and sometimes
> then need the fake path.
>
> > > > We were under the assumption the fake path is only needed
> > > > for mapped files, but the list below suggests that it matters
> > > > to other file operations as well...
> > > >
> > > > >
> > > > > dump_common_audit_data
>
> This is an example of a generic helper.
> There is no way of knowing if it really wants the real or fake path.
> It depends if the audit rule was set on ovl path or on real path,
> but if audit rule was set on real path, we should not let the fake path
> avert the audit, same as we should not have let the real file avert
> fsnotify events.
>
> It seems to me the audit rules are set on inodes and compare
> st_dev/st_ino, so it is more likely that for operations on the realfile,
> the real path makes more sense to print.
>
> > > > > ima_file_mprotect
>
> From the little experience I have with overlayfs and IMA,
> nothing works much wrt measuring and verifying operations
> on the real files.
>
> > > > > common_file_perm (I don't understand the code enough to know whether
> > > > > it needs fake dentry or not)
> > > > > aa_file_perm
> > > > > __file_path_perm
>
> Same rationale as in audit.
> We do not want to avert permission hooks for rules that were set
> on the real inode/path, so it makes way more sense to use real path
> in these common helpers.
>
> Printing the wrong path is not as bad as not printing an audit!
>
> > > > > print_bad_pte
>
> I am not very concerned about printing real path in kmsg errors.
> Those errors are more likely cause by underlying fs/block issues anyway?
>
> > > > > file_path
>
> Too low level.
> Call sites that need to print the fake path can use d_path()
>
> > > > > seq_print_user_ip
>
> Yes.
>
> > > > > __mnt_want_write_file
> > > > > __mnt_drop_write_file
> > > > > file_dentry_name
>
> Too low level.
>
> > > > >
> > > > > Didn't go into drivers/ and didn't follow indirect calls (e.g.
> > > > > f_op->fsysnc).  I also may have missed something along the way, but my
> > > > > guess is that I did catch most cases.
> > > >
> > > > Wow. So much for 3-4 special cases...
> > > >
> > > > Confused by some of the above.
> > > >
> > > > Why would we want __mnt_want_write_file on the fake path?
> > > > We'd already taken __mnt_want_write on overlay and with
> > > > real file we need __mnt_want_write on the real path.
> >
> > It's for write faults on memory maps.   The code already branches on
> > file->f_mode, I don't think it would be a big performance hit to check
> > FMODE_FAKE_PATH.
> >
>
> Again, FMODE_BACKING is set on the same realfile that is used
> for read/write_iter. Unless you will be willing to allocate two realfiles.
>
> I added a patch to take mnt_writers refcount for both fake and real path
> for writable file.
> Page faults only need to take freeze protection on real sb. no?
>
> > > >
> > > > For IMA/LSMs, I'd imagine that like fanotify, they would rather get
> > > > the real path where the real policy is stored.
> > > > If some log files end with relative path instead of full fake path
> > > > it's probably not the worst outcome.
> > > >
> > > > Thoughts?
> > >
> > > Considering the results of your audit, I think I prefer to keep
> > > f_path fake and store real_path in struct file_fake for code
> > > that wants the real path.
> > >
> > > This will keep all logic unchanged, which is better for my health.
> > > and only fsnotify (for now) will start using f_real_path() to
> > > generate events on real fs objects.
> >
> > That's also an option.
> >
> > I think f_fake_path() would still be a move in the right direction.
> > We have 46 instances of file_dentry() currently and of those special
> > cases most are cosmetic, while missing file_dentry() ones are
> > crashable.
> >
>
> Here is what I have so far:
>
> https://github.com/amir73il/linux/commits/ovl_fake_path
>
> I tested it with fstests and nothing popped up.
> If this looks like a good start to you, I can throw it at linux-next and
> see and anything floats.
>

Hi Paul,

I would like to ask for your help to confirm my theory about
"overlayfs backing files" and LSM/IMA/audit hooks.

First, let me start with some background for the audience.

An overlayfs typically has lower+upper underlying layers, let's assume
for the sake of discussion that both lower and upper are on an xfs
mount at /mnt/xfs and that overlayfs is mounted at /mnt/ovl.

When a user opens a file from overlayfs mount path, an additional
internal "backing file" is opened by overlayfs on either the lower or
upper xfs path (e.g. /mnt/xfs/lower/foo).

In this example, security_file_open() will be called for the overlayfs
file at "/mnt/ovl/foo" with user credential and then backing_file_open()
will once again call security_file_open() with the backing file at
"/mnt/xfs/lower/foo" with the stored credentials of the user that mounted
overlayfs (i.e. mounter credentials). I guess you already know this part.

The problem is that the backing file's f_path is not "/mnt/xfs/lower/foo"
but rather "/mnt/ovl/foo". It has always been this way with overlayfs
files, more or less. The reason is so users will not be exposed to the
real path via /proc/<pid>/maps. backing files are not in the process
fd table, so the real path is never exposed via /proc/<pid>/fd/.

This special condition is sometimes referred to as "fake" path -
The helper open_with_fake_path() was renamed to backing_file_open()
in commit 62d53c4a1dfe ("fs: use backing_file container for internal files
with "fake" f_path").

I knew for a long time that fsnotify hooks couldn't handle files with
fake path correctly and that is the reason that I introduced the backing_file
container and file_real_path(), so that fsnotify can get to the real path
of overlayfs backing files (e.g. "/mnt/xfs/lower/foo") and fsnotify watches
on the underlying xfs filesystem could be respected.

My theory is that all the LSM hooks as well as the non-LSM IMA/audit
hooks, always need the real path of overlayfs backing files in order to
enforce their policy and not the fake path.

Mimi has know for some time that IMA does not work well in conjunction
with overlayfs and more specifically, an IMA policy that wants to measure
all files on the underlying xfs does not work well when overlayfs is using
backing files on xfs.

For example, if ima_file_free() would sometimes use the inode file->f_inode
and sometimes use d_inode(file->f_path.dentry), very bad things would
happen because it is not the same object.

For this reason I posted this IMA patch:
https://lore.kernel.org/linux-fsdevel/20230913073755.3489676-1-amir73il@gmail.com/

But I don't like this solution - there are endless places that may
require this fix - I suspect that many of the LSM modules are "broken"
in the same manner when it comes to overlayfs backing files.

I would rather have backing_file f_path always be the real path and
the few places that need the fake path would opt in for it, as I've
implemented in this patch set [1].

To prove my theory that this change is correct for all LSM/audit hooks,
I audited the hooks that accept file or path arguments.

It is important to note that the security_path_* hooks are never called
by overlayfs itself on the underlying filesystem paths.

overlayfs only ever calls the vfs helpers (e.g. vfs_mkdir()) which
call the security_inode_* and security_dentry_* hooks on the underlying
filesystem objects.

Likewise, security_mmap_file() and security_file_mprotect() are also
called at the "syscall level" and overlayfs never calls any vfs helpers
that call those hooks when mmaping the backing file.
Arguably, overlayfs should call security_mmap_file() explicitly
in ovl_mmap() on the real backing file.

Other security_file_* LSM hooks may very well be called by
overlayfs when operating on the backing file, for example
security_file_permission() from ovl_read_iter() which reads from
the backing file.

But in all those security_file_* LSM hooks, just like my examples
with ima_file_free() and fsnotify_open(), there is never going to
be a case where the LSM module would care about the overlayfs
fake path, and the code is always going to be more safe if the
assumption that d_inode(file->f_path.dentry) is the same as
file->f_inode holds for overlayfs backing files.

Was I able to explain the problem?

Was I able to explain why I think that the proposed change [1]
is safe and good for all the LSM modules, IMA and audit?

Do you agree with my theory?

Would you be able to run an audit integration test and other
LSM tests if available on my branch to let me know if it breaks
anything?

I could push this to linux-next via overlayfs tree, but because
it is a very subtle change, I wanted to get some ACK from
Miklos and from you on the concept first.

If possible to get an early audit/LSM test run that would be
awesome, because I don't really know which audit/LSM tests
are regularly run by bots on linux-next.

Thanks,
Amir.

[1] https://github.com/amir73il/linux/commits/ovl_fake_path

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2023-10-04 15:30 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-09  7:32 [PATCH 0/3] Reduce impact of overlayfs fake path files Amir Goldstein
2023-06-09  7:32 ` [PATCH 1/3] fs: use fake_file container for internal files with fake f_path Amir Goldstein
2023-06-09 11:32   ` Christian Brauner
2023-06-09 11:57     ` Amir Goldstein
2023-06-09 12:12       ` Christian Brauner
2023-06-09 12:20         ` Amir Goldstein
2023-06-09 12:54           ` Christian Brauner
2023-06-09 13:00             ` Christian Brauner
2023-06-09 13:09               ` Amir Goldstein
2023-06-11 19:11         ` Amir Goldstein
2023-06-12  7:55           ` Christian Brauner
2023-06-09  7:32 ` [PATCH 2/3] fs: use file_fake_path() to get path of mapped files for display Amir Goldstein
2023-06-09  8:19   ` Miklos Szeredi
2023-06-09  7:32 ` [PATCH 3/3] fs: store fake path in file_fake along with real path Amir Goldstein
2023-06-09 11:12   ` Christian Brauner
2023-06-09 11:30     ` Amir Goldstein
2023-06-09 13:15 ` [PATCH 0/3] Reduce impact of overlayfs fake path files Miklos Szeredi
2023-06-09 14:28   ` Amir Goldstein
2023-06-09 14:42     ` Amir Goldstein
2023-06-09 15:00       ` Miklos Szeredi
2023-06-09 19:17         ` Amir Goldstein
2023-06-12  7:57         ` Christian Brauner
2023-10-02 15:32         ` Amir Goldstein
2023-10-04 15:29           ` Amir Goldstein
2023-06-09 15:27     ` Mimi Zohar
2023-06-09 13:15 ` Tetsuo Handa
2023-06-09 13:54   ` Amir Goldstein

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.