linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/9] extended fdinfo via procfs series, v7
@ 2012-08-23 10:43 Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
                   ` (9 more replies)
  0 siblings, 10 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields

Hi guys,

here is updated version of the fdinfo via procfs series,
the changes from previous one are the following

 - fhandle is carried inside inotify mark but this feature
   is CONFIG dependent to not bloat the kernel for users
   who don't need it

 - a small fix in exportfs code to prevent nil dereference

the comments would be appreciated.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-25 17:16   ` Al Viro
  2012-08-23 10:43 ` [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 Cyrill Gorcunov
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-base-proc-5 --]
[-- Type: text/plain, Size: 23313 bytes --]

This patch prepares the ground for further extension of
/proc/pid/fd[info] handling code by moving fdinfo handling
code into fs/proc/fd.c.

I think such move makes both fs/proc/base.c and fs/proc/fd.c
easier to read.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/proc/Makefile   |    2 
 fs/proc/base.c     |  388 -----------------------------------------------------
 fs/proc/fd.c       |  351 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/fd.h       |   14 +
 fs/proc/internal.h |   48 ++++++
 5 files changed, 416 insertions(+), 387 deletions(-)

Index: linux-2.6.git/fs/proc/Makefile
===================================================================
--- linux-2.6.git.orig/fs/proc/Makefile
+++ linux-2.6.git/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		proc_tty.o
+		proc_tty.o fd.o
 proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -90,6 +90,7 @@
 #endif
 #include <trace/events/oom.h>
 #include "internal.h"
+#include "fd.h"
 
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
 
-static int proc_fd_permission(struct inode *inode, int mask);
-
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -1492,7 +1491,7 @@ out:
 	return error;
 }
 
-static const struct inode_operations proc_pid_link_inode_operations = {
+const struct inode_operations proc_pid_link_inode_operations = {
 	.readlink	= proc_pid_readlink,
 	.follow_link	= proc_pid_follow_link,
 	.setattr	= proc_setattr,
@@ -1501,21 +1500,6 @@ static const struct inode_operations pro
 
 /* building an inode */
 
-static int task_dumpable(struct task_struct *task)
-{
-	int dumpable = 0;
-	struct mm_struct *mm;
-
-	task_lock(task);
-	mm = task->mm;
-	if (mm)
-		dumpable = get_dumpable(mm);
-	task_unlock(task);
-	if(dumpable == 1)
-		return 1;
-	return 0;
-}
-
 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
 	struct inode * inode;
@@ -1641,15 +1625,6 @@ int pid_revalidate(struct dentry *dentry
 	return 0;
 }
 
-static int pid_delete_dentry(const struct dentry * dentry)
-{
-	/* Is the task we represent dead?
-	 * If so, then don't put the dentry on the lru list,
-	 * kill it immediately.
-	 */
-	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
@@ -1712,289 +1687,6 @@ end_instantiate:
 	return filldir(dirent, name, len, filp->f_pos, ino, type);
 }
 
-static unsigned name_to_int(struct dentry *dentry)
-{
-	const char *name = dentry->d_name.name;
-	int len = dentry->d_name.len;
-	unsigned n = 0;
-
-	if (len > 1 && *name == '0')
-		goto out;
-	while (len-- > 0) {
-		unsigned c = *name++ - '0';
-		if (c > 9)
-			goto out;
-		if (n >= (~0U-9)/10)
-			goto out;
-		n *= 10;
-		n += c;
-	}
-	return n;
-out:
-	return ~0U;
-}
-
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
-{
-	struct task_struct *task = get_proc_task(inode);
-	struct files_struct *files = NULL;
-	struct file *file;
-	int fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
-	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
-		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
-
-			fdt = files_fdtable(files);
-			f_flags = file->f_flags & ~O_CLOEXEC;
-			if (close_on_exec(fd, fdt))
-				f_flags |= O_CLOEXEC;
-
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
-		}
-		spin_unlock(&files->file_lock);
-		put_files_struct(files);
-	}
-	return -ENOENT;
-}
-
-static int proc_fd_link(struct dentry *dentry, struct path *path)
-{
-	return proc_fd_info(dentry->d_inode, path, NULL);
-}
-
-static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
-{
-	struct inode *inode;
-	struct task_struct *task;
-	int fd;
-	struct files_struct *files;
-	const struct cred *cred;
-
-	if (flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	inode = dentry->d_inode;
-	task = get_proc_task(inode);
-	fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		if (files) {
-			struct file *file;
-			rcu_read_lock();
-			file = fcheck_files(files, fd);
-			if (file) {
-				unsigned f_mode = file->f_mode;
-
-				rcu_read_unlock();
-				put_files_struct(files);
-
-				if (task_dumpable(task)) {
-					rcu_read_lock();
-					cred = __task_cred(task);
-					inode->i_uid = cred->euid;
-					inode->i_gid = cred->egid;
-					rcu_read_unlock();
-				} else {
-					inode->i_uid = GLOBAL_ROOT_UID;
-					inode->i_gid = GLOBAL_ROOT_GID;
-				}
-
-				if (S_ISLNK(inode->i_mode)) {
-					unsigned i_mode = S_IFLNK;
-					if (f_mode & FMODE_READ)
-						i_mode |= S_IRUSR | S_IXUSR;
-					if (f_mode & FMODE_WRITE)
-						i_mode |= S_IWUSR | S_IXUSR;
-					inode->i_mode = i_mode;
-				}
-
-				security_task_to_inode(task, inode);
-				put_task_struct(task);
-				return 1;
-			}
-			rcu_read_unlock();
-			put_files_struct(files);
-		}
-		put_task_struct(task);
-	}
-	d_drop(dentry);
-	return 0;
-}
-
-static const struct dentry_operations tid_fd_dentry_operations =
-{
-	.d_revalidate	= tid_fd_revalidate,
-	.d_delete	= pid_delete_dentry,
-};
-
-static struct dentry *proc_fd_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-
-	inode->i_mode = S_IFLNK;
-	inode->i_op = &proc_pid_link_inode_operations;
-	inode->i_size = 64;
-	ei->op.proc_get_link = proc_fd_link;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfd_common(struct inode *dir,
-					   struct dentry *dentry,
-					   instantiate_t instantiate)
-{
-	struct task_struct *task = get_proc_task(dir);
-	unsigned fd = name_to_int(dentry);
-	struct dentry *result = ERR_PTR(-ENOENT);
-
-	if (!task)
-		goto out_no_task;
-	if (fd == ~0U)
-		goto out;
-
-	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
-out:
-	put_task_struct(task);
-out_no_task:
-	return result;
-}
-
-static int proc_readfd_common(struct file * filp, void * dirent,
-			      filldir_t filldir, instantiate_t instantiate)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *p = get_proc_task(inode);
-	unsigned int fd, ino;
-	int retval;
-	struct files_struct * files;
-
-	retval = -ENOENT;
-	if (!p)
-		goto out_no_task;
-	retval = 0;
-
-	fd = filp->f_pos;
-	switch (fd) {
-		case 0:
-			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		case 1:
-			ino = parent_ino(dentry);
-			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		default:
-			files = get_files_struct(p);
-			if (!files)
-				goto out;
-			rcu_read_lock();
-			for (fd = filp->f_pos-2;
-			     fd < files_fdtable(files)->max_fds;
-			     fd++, filp->f_pos++) {
-				char name[PROC_NUMBUF];
-				int len;
-				int rv;
-
-				if (!fcheck_files(files, fd))
-					continue;
-				rcu_read_unlock();
-
-				len = snprintf(name, sizeof(name), "%d", fd);
-				rv = proc_fill_cache(filp, dirent, filldir,
-						     name, len, instantiate, p,
-						     (void *)(unsigned long)fd);
-				if (rv < 0)
-					goto out_fd_loop;
-				rcu_read_lock();
-			}
-			rcu_read_unlock();
-out_fd_loop:
-			put_files_struct(files);
-	}
-out:
-	put_task_struct(p);
-out_no_task:
-	return retval;
-}
-
-static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
-				    unsigned int flags)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
-}
-
-static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
-}
-
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				      size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
-static const struct file_operations proc_fd_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfd,
-	.llseek		= default_llseek,
-};
-
 #ifdef CONFIG_CHECKPOINT_RESTORE
 
 /*
@@ -2337,82 +2029,6 @@ static const struct file_operations proc
 
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
-/*
- * /proc/pid/fd needs a special permission handler so that a process can still
- * access /proc/self/fd after it has executed a setuid().
- */
-static int proc_fd_permission(struct inode *inode, int mask)
-{
-	int rv = generic_permission(inode, mask);
-	if (rv == 0)
-		return 0;
-	if (task_pid(current) == proc_pid(inode))
-		rv = 0;
-	return rv;
-}
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fd_inode_operations = {
-	.lookup		= proc_lookupfd,
-	.permission	= proc_fd_permission,
-	.setattr	= proc_setattr,
-};
-
-static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-	inode->i_mode = S_IFREG | S_IRUSR;
-	inode->i_fop = &proc_fdinfo_file_operations;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfdinfo(struct inode *dir,
-					struct dentry *dentry,
-					unsigned int flags)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
-}
-
-static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir,
-				  proc_fdinfo_instantiate);
-}
-
-static const struct file_operations proc_fdinfo_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfdinfo,
-	.llseek		= default_llseek,
-};
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fdinfo_inode_operations = {
-	.lookup		= proc_lookupfdinfo,
-	.setattr	= proc_setattr,
-};
-
-
 static struct dentry *proc_pident_instantiate(struct inode *dir,
 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/proc/fd.c
@@ -0,0 +1,351 @@
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/dcache.h>
+#include <linux/path.h>
+#include <linux/fdtable.h>
+#include <linux/namei.h>
+#include <linux/pid.h>
+#include <linux/security.h>
+
+#include <linux/proc_fs.h>
+
+#include "internal.h"
+#include "fd.h"
+
+#define PROC_FDINFO_MAX 64
+
+static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct files_struct *files = NULL;
+	int fd = proc_fd(inode);
+	struct file *file;
+
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+	if (files) {
+		/*
+		 * We are not taking a ref to the file structure, so we must
+		 * hold ->file_lock.
+		 */
+		spin_lock(&files->file_lock);
+		file = fcheck_files(files, fd);
+		if (file) {
+			unsigned int f_flags;
+			struct fdtable *fdt;
+
+			fdt = files_fdtable(files);
+			f_flags = file->f_flags & ~O_CLOEXEC;
+			if (close_on_exec(fd, fdt))
+				f_flags |= O_CLOEXEC;
+
+			if (path) {
+				*path = file->f_path;
+				path_get(&file->f_path);
+			}
+			if (info)
+				snprintf(info, PROC_FDINFO_MAX,
+					 "pos:\t%lli\n"
+					 "flags:\t0%o\n",
+					 (long long) file->f_pos,
+					 f_flags);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+			return 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	return -ENOENT;
+}
+
+static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct files_struct *files;
+	struct task_struct *task;
+	const struct cred *cred;
+	struct inode *inode;
+	int fd;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	fd = proc_fd(inode);
+
+	if (task) {
+		files = get_files_struct(task);
+		if (files) {
+			struct file *file;
+
+			rcu_read_lock();
+			file = fcheck_files(files, fd);
+			if (file) {
+				unsigned f_mode = file->f_mode;
+
+				rcu_read_unlock();
+				put_files_struct(files);
+
+				if (task_dumpable(task)) {
+					rcu_read_lock();
+					cred = __task_cred(task);
+					inode->i_uid = cred->euid;
+					inode->i_gid = cred->egid;
+					rcu_read_unlock();
+				} else {
+					inode->i_uid = GLOBAL_ROOT_UID;
+					inode->i_gid = GLOBAL_ROOT_GID;
+				}
+
+				if (S_ISLNK(inode->i_mode)) {
+					unsigned i_mode = S_IFLNK;
+					if (f_mode & FMODE_READ)
+						i_mode |= S_IRUSR | S_IXUSR;
+					if (f_mode & FMODE_WRITE)
+						i_mode |= S_IWUSR | S_IXUSR;
+					inode->i_mode = i_mode;
+				}
+
+				security_task_to_inode(task, inode);
+				put_task_struct(task);
+				return 1;
+			}
+			rcu_read_unlock();
+			put_files_struct(files);
+		}
+		put_task_struct(task);
+	}
+
+	d_drop(dentry);
+	return 0;
+}
+
+static const struct dentry_operations tid_fd_dentry_operations = {
+	.d_revalidate	= tid_fd_revalidate,
+	.d_delete	= pid_delete_dentry,
+};
+
+static int proc_fd_link(struct dentry *dentry, struct path *path)
+{
+	return proc_fd_info(dentry->d_inode, path, NULL);
+}
+
+static struct dentry *
+proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
+		    struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFLNK;
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+
+	ei->op.proc_get_link = proc_fd_link;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, 0))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *proc_lookupfd_common(struct inode *dir,
+					   struct dentry *dentry,
+					   instantiate_t instantiate)
+{
+	struct task_struct *task = get_proc_task(dir);
+	struct dentry *result = ERR_PTR(-ENOENT);
+	unsigned fd = name_to_int(dentry);
+
+	if (!task)
+		goto out_no_task;
+	if (fd == ~0U)
+		goto out;
+
+	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+out:
+	put_task_struct(task);
+out_no_task:
+	return result;
+}
+
+static int proc_readfd_common(struct file * filp, void * dirent,
+			      filldir_t filldir, instantiate_t instantiate)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct task_struct *p = get_proc_task(inode);
+	struct files_struct *files;
+	unsigned int fd, ino;
+	int retval;
+
+	retval = -ENOENT;
+	if (!p)
+		goto out_no_task;
+	retval = 0;
+
+	fd = filp->f_pos;
+	switch (fd) {
+		case 0:
+			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		case 1:
+			ino = parent_ino(dentry);
+			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		default:
+			files = get_files_struct(p);
+			if (!files)
+				goto out;
+			rcu_read_lock();
+			for (fd = filp->f_pos - 2;
+			     fd < files_fdtable(files)->max_fds;
+			     fd++, filp->f_pos++) {
+				char name[PROC_NUMBUF];
+				int len;
+				int rv;
+
+				if (!fcheck_files(files, fd))
+					continue;
+				rcu_read_unlock();
+
+				len = snprintf(name, sizeof(name), "%d", fd);
+				rv = proc_fill_cache(filp, dirent, filldir,
+						     name, len, instantiate, p,
+						     (void *)(unsigned long)fd);
+				if (rv < 0)
+					goto out_fd_loop;
+				rcu_read_lock();
+			}
+			rcu_read_unlock();
+out_fd_loop:
+			put_files_struct(files);
+	}
+out:
+	put_task_struct(p);
+out_no_task:
+	return retval;
+}
+
+static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
+				size_t len, loff_t *ppos)
+{
+	char tmp[PROC_FDINFO_MAX];
+	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
+	if (!err)
+		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+	return err;
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open           = nonseekable_open,
+	.read		= proc_fdinfo_read,
+	.llseek		= no_llseek,
+};
+
+static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+}
+
+const struct file_operations proc_fd_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfd,
+	.llseek		= default_llseek,
+};
+
+static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
+				    unsigned int flags)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
+}
+
+/*
+ * /proc/pid/fd needs a special permission handler so that a process can still
+ * access /proc/self/fd after it has executed a setuid().
+ */
+int proc_fd_permission(struct inode *inode, int mask)
+{
+	int rv = generic_permission(inode, mask);
+	if (rv == 0)
+		return 0;
+	if (task_pid(current) == proc_pid(inode))
+		rv = 0;
+	return rv;
+}
+
+const struct inode_operations proc_fd_inode_operations = {
+	.lookup		= proc_lookupfd,
+	.permission	= proc_fd_permission,
+	.setattr	= proc_setattr,
+};
+
+static struct dentry *
+proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
+			struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_fop = &proc_fdinfo_file_operations;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, 0))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *
+proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
+}
+
+static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir,
+				  proc_fdinfo_instantiate);
+}
+
+const struct inode_operations proc_fdinfo_inode_operations = {
+	.lookup		= proc_lookupfdinfo,
+	.setattr	= proc_setattr,
+};
+
+const struct file_operations proc_fdinfo_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfdinfo,
+	.llseek		= default_llseek,
+};
Index: linux-2.6.git/fs/proc/fd.h
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/proc/fd.h
@@ -0,0 +1,14 @@
+#ifndef __PROCFS_FD_H__
+#define __PROCFS_FD_H__
+
+#include <linux/fs.h>
+
+extern const struct file_operations proc_fd_operations;
+extern const struct inode_operations proc_fd_inode_operations;
+
+extern const struct file_operations proc_fdinfo_operations;
+extern const struct inode_operations proc_fdinfo_inode_operations;
+
+extern int proc_fd_permission(struct inode *inode, int mask);
+
+#endif /* __PROCFS_FD_H__ */
Index: linux-2.6.git/fs/proc/internal.h
===================================================================
--- linux-2.6.git.orig/fs/proc/internal.h
+++ linux-2.6.git/fs/proc/internal.h
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/sched.h>
 #include <linux/proc_fs.h>
 struct  ctl_table_header;
 
@@ -65,6 +66,7 @@ extern const struct file_operations proc
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
+extern const struct inode_operations proc_pid_link_inode_operations;
 
 struct proc_maps_private {
 	struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *
 	return PROC_I(inode)->fd;
 }
 
+static inline int task_dumpable(struct task_struct *task)
+{
+	int dumpable = 0;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		dumpable = get_dumpable(mm);
+	task_unlock(task);
+	if(dumpable == 1)
+		return 1;
+	return 0;
+}
+
+static inline int pid_delete_dentry(const struct dentry * dentry)
+{
+	/* Is the task we represent dead?
+	 * If so, then don't put the dentry on the lru list,
+	 * kill it immediately.
+	 */
+	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static inline unsigned name_to_int(struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	unsigned n = 0;
+
+	if (len > 1 && *name == '0')
+		goto out;
+	while (len-- > 0) {
+		unsigned c = *name++ - '0';
+		if (c > 9)
+			goto out;
+		if (n >= (~0U-9)/10)
+			goto out;
+		n *= 10;
+		n += c;
+	}
+	return n;
+out:
+	return ~0U;
+}
+
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,



^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-26  2:46   ` Al Viro
  2012-08-23 10:43 ` [patch 3/9] procfs: Add ability to plug in auxiliary fdinfo providers Cyrill Gorcunov
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-seq-ops-7 --]
[-- Type: text/plain, Size: 5901 bytes --]

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Note the proc_fd_link no longer call for proc_fd_info, simply because
proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
prototype).

v2 (by Al Viro):
 - Don't use helper function with optional arguments, thus proc_fd_info get deprecated
 - Use proc_fdinfo structure with seq_file embedded, thus we can use container_of helper
 - Use fput to free reference to the file we've grabbed

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/proc/fd.c |  145 ++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 99 insertions(+), 46 deletions(-)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -6,61 +6,105 @@
 #include <linux/namei.h>
 #include <linux/pid.h>
 #include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
 
 #include <linux/proc_fs.h>
 
 #include "internal.h"
 #include "fd.h"
 
-#define PROC_FDINFO_MAX 64
+struct proc_fdinfo {
+	struct seq_file		m;
+	struct file		*fd_file;
+	int			f_flags;
+};
+
+static int seq_show(struct seq_file *m, void *v)
+{
+	struct proc_fdinfo *fdinfo = container_of(m, struct proc_fdinfo, m);
+	seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+		   (long long)fdinfo->fd_file->f_pos, fdinfo->f_flags);
+	return 0;
+}
 
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct proc_fdinfo *fdinfo = NULL;
 	struct files_struct *files = NULL;
-	int fd = proc_fd(inode);
-	struct file *file;
+	struct task_struct *task;
+	struct file *fd_file;
+	int f_flags, ret;
 
+	ret = -ENOENT;
+
+	task = get_proc_task(inode);
 	if (task) {
 		files = get_files_struct(task);
 		put_task_struct(task);
 	}
+
 	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
+		int fd = proc_fd(inode);
+
 		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			struct fdtable *fdt = files_fdtable(files);
 
-			fdt = files_fdtable(files);
-			f_flags = file->f_flags & ~O_CLOEXEC;
+			f_flags = fd_file->f_flags & ~O_CLOEXEC;
 			if (close_on_exec(fd, fdt))
 				f_flags |= O_CLOEXEC;
 
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			get_file(fd_file);
+			ret = 0;
 		}
 		spin_unlock(&files->file_lock);
+
 		put_files_struct(files);
 	}
-	return -ENOENT;
+
+	if (ret)
+		return ret;
+
+	ret = -ENOMEM;
+	fdinfo = kmalloc(sizeof(*fdinfo), GFP_KERNEL);
+	if (!fdinfo)
+		goto err_put;
+
+	fdinfo->fd_file = fd_file;
+	fdinfo->f_flags = f_flags;
+	file->private_data = &fdinfo->m;
+
+	ret = single_open(file, seq_show, NULL);
+	if (ret)
+		goto err_free;
+
+	return 0;
+
+err_free:
+	kfree(fdinfo);
+err_put:
+	fput(fd_file);
+	return ret;
 }
 
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+	struct proc_fdinfo *fdinfo =
+		container_of((struct seq_file *)file->private_data,
+			     struct proc_fdinfo, m);
+	fput(fdinfo->fd_file);
+	return single_release(inode, file);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open		= seq_fdinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_fdinfo_release,
+};
+
 static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct files_struct *files;
@@ -130,7 +174,32 @@ static const struct dentry_operations ti
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(dentry->d_inode, path, NULL);
+	struct files_struct *files = NULL;
+	struct task_struct *task;
+	int ret = -ENOENT;
+
+	task = get_proc_task(dentry->d_inode);
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+
+	if (files) {
+		int fd = proc_fd(dentry->d_inode);
+		struct file *fd_file;
+
+		spin_lock(&files->file_lock);
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			*path = fd_file->f_path;
+			path_get(&fd_file->f_path);
+			ret = 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+
+	return ret;
 }
 
 static struct dentry *
@@ -245,22 +314,6 @@ out_no_task:
 	return retval;
 }
 
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
 {
 	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 3/9] procfs: Add ability to plug in auxiliary fdinfo providers
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present Cyrill Gorcunov
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-seq-ops-helpers-11 --]
[-- Type: text/plain, Size: 2288 bytes --]

This patch brings ability to print out auxiliary data associated
with file in procfs interface /proc/pid/fdinfo/fd.

In particular further patches make eventfd, evenpoll, signalfd
and fsnotify to print additional information complete enough
to restore these objects after checkpoint.

To simplify the code we add show_fdinfo callback inside
struct file_operations (as Al and Pavel are proposing).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/proc/fd.c       |    5 +++++
 include/linux/fs.h |    3 +++
 2 files changed, 8 insertions(+)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -23,8 +23,13 @@ struct proc_fdinfo {
 static int seq_show(struct seq_file *m, void *v)
 {
 	struct proc_fdinfo *fdinfo = container_of(m, struct proc_fdinfo, m);
+
 	seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
 		   (long long)fdinfo->fd_file->f_pos, fdinfo->f_flags);
+
+	if (fdinfo->fd_file->f_op->show_fdinfo)
+		return fdinfo->fd_file->f_op->show_fdinfo(m, fdinfo->fd_file);
+
 	return 0;
 }
 
Index: linux-2.6.git/include/linux/fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/fs.h
+++ linux-2.6.git/include/linux/fs.h
@@ -1775,6 +1775,8 @@ struct block_device_operations;
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+struct seq_file;
+
 struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
@@ -1803,6 +1805,7 @@ struct file_operations {
 	int (*setlease)(struct file *, long, struct file_lock **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
+	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 struct inode_operations {


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (2 preceding siblings ...)
  2012-08-23 10:43 ` [patch 3/9] procfs: Add ability to plug in auxiliary fdinfo providers Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 12:12   ` J. Bruce Fields
  2012-08-23 10:43 ` [patch 5/9] fs, notify: Add file handle entry into inotify_inode_mark Cyrill Gorcunov
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: fs-exportfs-add-null-check --]
[-- Type: text/plain, Size: 1204 bytes --]

If there is no s_export_op present in a target superblock
we might have nil dereference. Fix it with eplicit test
if s_export_op is provided.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/exportfs/expfs.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6.git/fs/exportfs/expfs.c
===================================================================
--- linux-2.6.git.orig/fs/exportfs/expfs.c
+++ linux-2.6.git/fs/exportfs/expfs.c
@@ -357,7 +357,7 @@ int exportfs_encode_fh(struct dentry *de
 		 */
 		parent = p->d_inode;
 	}
-	if (nop->encode_fh)
+	if (nop && nop->encode_fh)
 		error = nop->encode_fh(inode, fid->raw, max_len, parent);
 	else
 		error = export_encode_fh(inode, fid, max_len, parent);


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 5/9] fs, notify: Add file handle entry into inotify_inode_mark
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (3 preceding siblings ...)
  2012-08-23 10:43 ` [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 6/9] fs, notify: Add procfs fdinfo helper v4 Cyrill Gorcunov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: fsnotify-add-fhandler --]
[-- Type: text/plain, Size: 4722 bytes --]

This file handle will be used in /proc/pid/fdinfo/fd
output, which in turn will allow to restore a watch
target after checkpoint.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/notify/inotify/inotify.h      |    8 +++++++
 fs/notify/inotify/inotify_user.c |   42 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 45 insertions(+), 5 deletions(-)

Index: linux-2.6.git/fs/notify/inotify/inotify.h
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify.h
+++ linux-2.6.git/fs/notify/inotify/inotify.h
@@ -1,6 +1,7 @@
 #include <linux/fsnotify_backend.h>
 #include <linux/inotify.h>
 #include <linux/slab.h> /* struct kmem_cache */
+#include <linux/exportfs.h>
 
 extern struct kmem_cache *event_priv_cachep;
 
@@ -9,9 +10,16 @@ struct inotify_event_private_data {
 	int wd;
 };
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_EXPORTFS) && defined(CONFIG_CHECKPOINT_RESTORE)
+# define INOTIFY_USE_FHANDLE
+#endif
+
 struct inotify_inode_mark {
 	struct fsnotify_mark fsn_mark;
 	int wd;
+#ifdef INOTIFY_USE_FHANDLE
+	__u8 fhandle[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+#endif
 };
 
 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -566,6 +566,33 @@ static void inotify_free_mark(struct fsn
 	kmem_cache_free(inotify_inode_mark_cachep, i_mark);
 }
 
+#ifdef INOTIFY_USE_FHANDLE
+static int inotify_encode_wd_fhandle(struct inotify_inode_mark *mark, struct dentry *dentry)
+{
+	struct file_handle *fhandle = (struct file_handle *)mark->fhandle;
+	int size, ret;
+
+	BUILD_BUG_ON(sizeof(mark->fhandle) <= sizeof(struct file_handle));
+
+	fhandle->handle_bytes = sizeof(mark->fhandle) - sizeof(struct file_handle);
+	size = fhandle->handle_bytes >> 2;
+
+	ret = exportfs_encode_fh(dentry, (struct fid *)fhandle->f_handle, &size,  0);
+	if ((ret == 255) || (ret == -ENOSPC))
+		return -EOVERFLOW;
+
+	fhandle->handle_type = ret;
+	fhandle->handle_bytes = size * sizeof(u32);
+
+	return 0;
+}
+# else
+static int inotify_encode_wd_fhandle(struct inotify_inode_mark *mark, struct dentry *dentry)
+{
+	return 0;
+}
+#endif
+
 static int inotify_update_existing_watch(struct fsnotify_group *group,
 					 struct inode *inode,
 					 u32 arg)
@@ -621,10 +648,11 @@ static int inotify_update_existing_watch
 }
 
 static int inotify_new_watch(struct fsnotify_group *group,
-			     struct inode *inode,
+			     struct dentry *dentry,
 			     u32 arg)
 {
 	struct inotify_inode_mark *tmp_i_mark;
+	struct inode *inode = dentry->d_inode;
 	__u32 mask;
 	int ret;
 	struct idr *idr = &group->inotify_data.idr;
@@ -647,6 +675,10 @@ static int inotify_new_watch(struct fsno
 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
 		goto out_err;
 
+	ret = inotify_encode_wd_fhandle(tmp_i_mark, dentry);
+	if (ret)
+		goto out_err;
+
 	ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
 				 tmp_i_mark);
 	if (ret)
@@ -673,16 +705,16 @@ out_err:
 	return ret;
 }
 
-static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+static int inotify_update_watch(struct fsnotify_group *group, struct dentry *dentry, u32 arg)
 {
 	int ret = 0;
 
 retry:
 	/* try to update and existing watch with the new arg */
-	ret = inotify_update_existing_watch(group, inode, arg);
+	ret = inotify_update_existing_watch(group, dentry->d_inode, arg);
 	/* no mark present, try to add a new one */
 	if (ret == -ENOENT)
-		ret = inotify_new_watch(group, inode, arg);
+		ret = inotify_new_watch(group, dentry, arg);
 	/*
 	 * inotify_new_watch could race with another thread which did an
 	 * inotify_new_watch between the update_existing and the add watch
@@ -785,7 +817,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int,
 	group = filp->private_data;
 
 	/* create/update an inode mark */
-	ret = inotify_update_watch(group, inode, mask);
+	ret = inotify_update_watch(group, path.dentry, mask);
 	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 6/9] fs, notify: Add procfs fdinfo helper v4
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (4 preceding siblings ...)
  2012-08-23 10:43 ` [patch 5/9] fs, notify: Add file handle entry into inotify_inode_mark Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 7/9] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-fsnotify-8 --]
[-- Type: text/plain, Size: 8001 bytes --]

This allow us to print out fsnotify details such as
watchee inode, device, mask and optionally a file handle.

For inotify objects if kernel compiled with exportfs support
the output will be

 | pos:	0
 | flags:	02000000
 | inotify wd:        3 ino:             9e7e sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 7e9e0000640d1b6d
 | inotify wd:        2 ino:             a111 sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 11a1000020542153
 | inotify wd:        1 ino:            6b149 sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 49b1060023552153

If kernel compiled without exportfs support, the file handle
won't be provided but inode and device only.

 | pos:	0
 | flags:	02000000
 | inotify wd:        3 ino:             9e7e sdev:   800013 mask:  800afce ignored_mask:        0
 | inotify wd:        2 ino:             a111 sdev:   800013 mask:  800afce ignored_mask:        0
 | inotify wd:        1 ino:            6b149 sdev:   800013 mask:  800afce ignored_mask:        0

For fanotify the output is like

 | pos:	0
 | flags:	02
 | fanotify ino:            68f71 sdev:   800013 mask:        1 ignored_mask: 40000000
 | fanotify mnt_id:       13 mask:        1 ignored_mask: 40000000

To minimize impact on general fsnotify code the new functionality
is gathered in fs/notify/fdinfo.c file.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/notify/Makefile                 |    2 
 fs/notify/fanotify/fanotify_user.c |    4 +
 fs/notify/fdinfo.c                 |  124 +++++++++++++++++++++++++++++++++++++
 fs/notify/fdinfo.h                 |   22 ++++++
 fs/notify/inotify/inotify_user.c   |    4 +
 5 files changed, 155 insertions(+), 1 deletion(-)

Index: linux-2.6.git/fs/notify/Makefile
===================================================================
--- linux-2.6.git.orig/fs/notify/Makefile
+++ linux-2.6.git/fs/notify/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o vfsmount_mark.o
+				   mark.o vfsmount_mark.o fdinfo.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
Index: linux-2.6.git/fs/notify/fanotify/fanotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/fanotify/fanotify_user.c
+++ linux-2.6.git/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,7 @@
 #include <asm/ioctls.h>
 
 #include "../../mount.h"
+#include "../fdinfo.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
@@ -446,6 +447,9 @@ static long fanotify_ioctl(struct file *
 }
 
 static const struct file_operations fanotify_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= fanotify_show_fdinfo,
+#endif
 	.poll		= fanotify_poll,
 	.read		= fanotify_read,
 	.write		= fanotify_write,
Index: linux-2.6.git/fs/notify/fdinfo.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.c
@@ -0,0 +1,124 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/inotify.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
+#include "inotify/inotify.h"
+#include "../fs/mount.h"
+
+#if defined(CONFIG_PROC_FS)
+
+#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
+
+static int show_fdinfo(struct seq_file *m, struct file *f,
+		       int (*show)(struct seq_file *m, struct fsnotify_mark *mark))
+{
+	struct fsnotify_group *group = f->private_data;
+	struct fsnotify_mark *mark;
+	int ret = 0;
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry(mark, &group->marks_list, g_list) {
+		ret = show(m, mark);
+		if (ret)
+			break;
+	}
+	spin_unlock(&group->mark_lock);
+	return ret;
+}
+
+#ifdef CONFIG_INOTIFY_USER
+
+static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+{
+	struct inotify_inode_mark *inode_mark;
+	struct inode *inode;
+	int ret = 0;
+
+	if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
+		return 0;
+
+	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+	inode = igrab(mark->i.inode);
+	if (inode) {
+		ret = seq_printf(m, "inotify wd: %8d ino: %16lx sdev: %8x "
+				 "mask: %8x ignored_mask: %8x ",
+				 inode_mark->wd, inode->i_ino,
+				 inode->i_sb->s_dev,
+				 mark->mask, mark->ignored_mask);
+#ifdef INOTIFY_USE_FHANDLE
+		if (!ret) {
+			int i;
+			struct file_handle *fhandle = (struct file_handle *)inode_mark->fhandle;
+			ret = seq_printf(m, "fhandle-bytes: %8x "
+					 "fhandle-type: %8x f_handle: ",
+					fhandle->handle_bytes,
+					fhandle->handle_type);
+
+			for (i = 0; i < fhandle->handle_bytes; i++) {
+				ret |= seq_printf(m, "%02x",
+						  (int)(unsigned char)fhandle->f_handle[i]);
+			}
+		}
+#endif
+		ret |= seq_putc(m, '\n');
+		iput(inode);
+	}
+
+	return ret;
+}
+
+int inotify_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	return show_fdinfo(m, f, inotify_fdinfo);
+}
+
+#endif /* CONFIG_INOTIFY_USER */
+
+#ifdef CONFIG_FANOTIFY
+
+static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+{
+	struct inode *inode;
+	int ret = 0;
+
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		return 0;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+		inode = igrab(mark->i.inode);
+		if (!inode)
+			goto out;
+		ret = seq_printf(m, "fanotify ino: %16lx sdev: %8x "
+				 "mask: %8x ignored_mask: %8x\n",
+				 inode->i_ino, inode->i_sb->s_dev,
+				 mark->mask, mark->ignored_mask);
+		iput(inode);
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
+		struct mount *mnt = real_mount(mark->m.mnt);
+
+		ret = seq_printf(m, "fanotify mnt_id: %8x mask: %8x ignored_mask: %8x\n",
+				 mnt->mnt_id, mark->mask, mark->ignored_mask);
+	}
+out:
+	return ret;
+}
+
+int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	return show_fdinfo(m, f, fanotify_fdinfo);
+}
+
+#endif /* CONFIG_FANOTIFY */
+
+#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */
+
+#endif /* CONFIG_PROC_FS */
Index: linux-2.6.git/fs/notify/fdinfo.h
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.h
@@ -0,0 +1,22 @@
+#ifndef __FSNOTIFY_FDINFO_H__
+#define __FSNOTIFY_FDINFO_H__
+
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+
+struct seq_file;
+struct file;
+
+#ifdef CONFIG_PROC_FS
+
+#ifdef CONFIG_INOTIFY_USER
+extern int inotify_show_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+#ifdef CONFIG_FANOTIFY
+extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f);
+#endif
+
+#endif /* CONFIG_PROC_FS */
+
+#endif /* __FSNOTIFY_FDINFO_H__ */
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -40,6 +40,7 @@
 #include <linux/wait.h>
 
 #include "inotify.h"
+#include "../fdinfo.h"
 
 #include <asm/ioctls.h>
 
@@ -335,6 +336,9 @@ static long inotify_ioctl(struct file *f
 }
 
 static const struct file_operations inotify_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= inotify_show_fdinfo,
+#endif
 	.poll		= inotify_poll,
 	.read		= inotify_read,
 	.fasync		= inotify_fasync,


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 7/9] fs, eventfd: Add procfs fdinfo helper
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (5 preceding siblings ...)
  2012-08-23 10:43 ` [patch 6/9] fs, notify: Add procfs fdinfo helper v4 Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 8/9] fs, epoll: Add procfs fdinfo helper v2 Cyrill Gorcunov
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-eventfd-7 --]
[-- Type: text/plain, Size: 1727 bytes --]

This allow us to print out raw counter value.
The /proc/pid/fdinfo/fd output is

 | pos:	0
 | flags:	04002
 | eventfd-count:               5a

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/eventfd.c |   20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Index: linux-2.6.git/fs/eventfd.c
===================================================================
--- linux-2.6.git.orig/fs/eventfd.c
+++ linux-2.6.git/fs/eventfd.c
@@ -19,6 +19,8 @@
 #include <linux/export.h>
 #include <linux/kref.h>
 #include <linux/eventfd.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 struct eventfd_ctx {
 	struct kref kref;
@@ -284,7 +286,25 @@ static ssize_t eventfd_write(struct file
 	return res;
 }
 
+#ifdef CONFIG_PROC_FS
+static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventfd_ctx *ctx = f->private_data;
+	int ret;
+
+	spin_lock_irq(&ctx->wqh.lock);
+	ret = seq_printf(m, "eventfd-count: %16llx\n",
+			 (unsigned long long)ctx->count);
+	spin_unlock_irq(&ctx->wqh.lock);
+
+	return ret;
+}
+#endif
+
 static const struct file_operations eventfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= eventfd_show_fdinfo,
+#endif
 	.release	= eventfd_release,
 	.poll		= eventfd_poll,
 	.read		= eventfd_read,


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 8/9] fs, epoll: Add procfs fdinfo helper v2
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (6 preceding siblings ...)
  2012-08-23 10:43 ` [patch 7/9] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 10:43 ` [patch 9/9] fdinfo: Show sigmask for signalfd fd v2 Cyrill Gorcunov
  2012-08-23 12:23 ` [patch 0/9] extended fdinfo via procfs series, v7 J. Bruce Fields
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-eventpoll-7 --]
[-- Type: text/plain, Size: 2106 bytes --]

This allow us to print out eventpoll target file descriptor,
events and data, the /proc/pid/fdinfo/fd consists of

 | pos:	0
 | flags:	02
 | tfd:        5 events:       1d data: ffffffffffffffff

This feature is CONFIG_CHECKPOINT_RESTORE only.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/eventpoll.c |   28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

Index: linux-2.6.git/fs/eventpoll.c
===================================================================
--- linux-2.6.git.orig/fs/eventpoll.c
+++ linux-2.6.git/fs/eventpoll.c
@@ -38,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 /*
  * LOCKING:
@@ -783,8 +785,34 @@ static unsigned int ep_eventpoll_poll(st
 	return pollflags != -1 ? pollflags : 0;
 }
 
+#ifdef CONFIG_PROC_FS
+static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventpoll *ep = f->private_data;
+	struct rb_node *rbp;
+	int ret;
+
+	mutex_lock(&ep->mtx);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+
+		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+				 epi->ffd.fd, epi->event.events,
+				 (long long)epi->event.data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&ep->mtx);
+
+	return ret;
+}
+#endif
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= ep_show_fdinfo,
+#endif
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [patch 9/9] fdinfo: Show sigmask for signalfd fd v2
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (7 preceding siblings ...)
  2012-08-23 10:43 ` [patch 8/9] fs, epoll: Add procfs fdinfo helper v2 Cyrill Gorcunov
@ 2012-08-23 10:43 ` Cyrill Gorcunov
  2012-08-23 12:23 ` [patch 0/9] extended fdinfo via procfs series, v7 J. Bruce Fields
  9 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 10:43 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley, Matthew Helsley, aneesh.kumar, bfields,
	Cyrill Gorcunov, Al Viro

[-- Attachment #1: seq-fdinfo-signalfd-3 --]
[-- Type: text/plain, Size: 3312 bytes --]

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/proc/array.c         |    2 +-
 fs/signalfd.c           |   26 ++++++++++++++++++++++++++
 include/linux/proc_fs.h |    3 +++
 3 files changed, 30 insertions(+), 1 deletion(-)

Index: linux-2.6.git/fs/proc/array.c
===================================================================
--- linux-2.6.git.orig/fs/proc/array.c
+++ linux-2.6.git/fs/proc/array.c
@@ -220,7 +220,7 @@ static inline void task_state(struct seq
 	seq_putc(m, '\n');
 }
 
-static void render_sigset_t(struct seq_file *m, const char *header,
+void render_sigset_t(struct seq_file *m, const char *header,
 				sigset_t *set)
 {
 	int i;
Index: linux-2.6.git/fs/signalfd.c
===================================================================
--- linux-2.6.git.orig/fs/signalfd.c
+++ linux-2.6.git/fs/signalfd.c
@@ -29,6 +29,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 void signalfd_cleanup(struct sighand_struct *sighand)
 {
@@ -46,6 +47,7 @@ void signalfd_cleanup(struct sighand_str
 }
 
 struct signalfd_ctx {
+	seqcount_t cnt;
 	sigset_t sigmask;
 };
 
@@ -227,7 +229,28 @@ static ssize_t signalfd_read(struct file
 	return total ? total: ret;
 }
 
+#ifdef CONFIG_PROC_FS
+static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct signalfd_ctx *ctx = f->private_data;
+	sigset_t sigmask;
+	unsigned seq;
+
+	do {
+		seq = read_seqcount_begin(&ctx->cnt);
+		sigmask = ctx->sigmask;
+	} while (read_seqcount_retry(&ctx->cnt, seq));
+
+	signotset(&sigmask);
+	render_sigset_t(m, "sigmask:\t", &sigmask);
+	return 0;
+}
+#endif
+
 static const struct file_operations signalfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= signalfd_show_fdinfo,
+#endif
 	.release	= signalfd_release,
 	.poll		= signalfd_poll,
 	.read		= signalfd_read,
@@ -259,6 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sig
 			return -ENOMEM;
 
 		ctx->sigmask = sigmask;
+		seqcount_init(&ctx->cnt);
 
 		/*
 		 * When we call this, the initialization must be complete, since
@@ -279,7 +303,9 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sig
 			return -EINVAL;
 		}
 		spin_lock_irq(&current->sighand->siglock);
+		write_seqcount_begin(&ctx->cnt);
 		ctx->sigmask = sigmask;
+		write_seqcount_end(&ctx->cnt);
 		spin_unlock_irq(&current->sighand->siglock);
 
 		wake_up(&current->sighand->signalfd_wqh);
Index: linux-2.6.git/include/linux/proc_fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/proc_fs.h
+++ linux-2.6.git/include/linux/proc_fs.h
@@ -290,4 +290,7 @@ static inline struct net *PDE_NET(struct
 	return pde->parent->data;
 }
 
+#include <asm/signal.h>
+
+void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
 #endif /* _LINUX_PROC_FS_H */


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present
  2012-08-23 10:43 ` [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present Cyrill Gorcunov
@ 2012-08-23 12:12   ` J. Bruce Fields
  2012-08-23 12:34     ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 12:12 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 02:43:27PM +0400, Cyrill Gorcunov wrote:
> If there is no s_export_op present in a target superblock
> we might have nil dereference.

Is that NULL dereference possible with current code, or is it a check
you're adding to account for a new caller that you're about to add?

I believe it's the latter, but this would be a good thing to make clear
in the changelog.

--b.

> Fix it with eplicit test
> if s_export_op is provided.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> CC: Pavel Emelyanov <xemul@parallels.com>
> CC: Al Viro <viro@ZenIV.linux.org.uk>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: James Bottomley <jbottomley@parallels.com>
> CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Matthew Helsley <matt.helsley@gmail.com>
> CC: "J. Bruce Fields" <bfields@fieldses.org>
> CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/exportfs/expfs.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Index: linux-2.6.git/fs/exportfs/expfs.c
> ===================================================================
> --- linux-2.6.git.orig/fs/exportfs/expfs.c
> +++ linux-2.6.git/fs/exportfs/expfs.c
> @@ -357,7 +357,7 @@ int exportfs_encode_fh(struct dentry *de
>  		 */
>  		parent = p->d_inode;
>  	}
> -	if (nop->encode_fh)
> +	if (nop && nop->encode_fh)
>  		error = nop->encode_fh(inode, fid->raw, max_len, parent);
>  	else
>  		error = export_encode_fh(inode, fid, max_len, parent);
> 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
                   ` (8 preceding siblings ...)
  2012-08-23 10:43 ` [patch 9/9] fdinfo: Show sigmask for signalfd fd v2 Cyrill Gorcunov
@ 2012-08-23 12:23 ` J. Bruce Fields
  2012-08-23 12:44   ` Cyrill Gorcunov
  9 siblings, 1 reply; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 12:23 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 02:43:23PM +0400, Cyrill Gorcunov wrote:
> Hi guys,
> 
> here is updated version of the fdinfo via procfs series,
> the changes from previous one are the following
> 
>  - fhandle is carried inside inotify mark but this feature
>    is CONFIG dependent to not bloat the kernel for users
>    who don't need it

As Al points out, this doesn't help much: if this feature is something a
distro will want to provide, then in practice all their users are
eventually going to end up with it turned on.

Could you quantify the cost somehow?

I wonder if you could get away with something less than MAX_HANDLE_SIZE?
128 bytes is the maximum allowable by NFSv4.  In practice I don't think
any of our filesystems need more than 40 or so right now.

--b.

> 
>  - a small fix in exportfs code to prevent nil dereference
> 
> the comments would be appreciated.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present
  2012-08-23 12:12   ` J. Bruce Fields
@ 2012-08-23 12:34     ` Cyrill Gorcunov
  2012-08-23 15:22       ` J. Bruce Fields
  0 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 12:34 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 08:12:30AM -0400, J. Bruce Fields wrote:
> On Thu, Aug 23, 2012 at 02:43:27PM +0400, Cyrill Gorcunov wrote:
> > If there is no s_export_op present in a target superblock
> > we might have nil dereference.
> 
> Is that NULL dereference possible with current code, or is it a check
> you're adding to account for a new caller that you're about to add?
> 
> I believe it's the latter, but this would be a good thing to make clear
> in the changelog.

With the current code it seems to be impossible (well, i can't be sure
about nfs caller) because do_sys_name_to_handle does check for s_export_op
to exist. Updated changelog below. After all I think not checking
s_export_op was a mistake in general -- this routine is exported to
other modules but has no a single line of comment about possibility
of nil dereference.
---
From: Cyrill Gorcunov <gorcunov@openvz.org>
Subject: fs, exportfs: Escape nil dereference if no s_export_op present

This routine will be used to generate a file handle in fdinfo
output for inotify subsystem, where if no s_export_op present
the general export_encode_fh should be used. Thus add
a test if s_export_op present inside exportfs_encode_fh itself.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/exportfs/expfs.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6.git/fs/exportfs/expfs.c
===================================================================
--- linux-2.6.git.orig/fs/exportfs/expfs.c
+++ linux-2.6.git/fs/exportfs/expfs.c
@@ -357,7 +357,7 @@ int exportfs_encode_fh(struct dentry *de
 		 */
 		parent = p->d_inode;
 	}
-	if (nop->encode_fh)
+	if (nop && nop->encode_fh)
 		error = nop->encode_fh(inode, fid->raw, max_len, parent);
 	else
 		error = export_encode_fh(inode, fid, max_len, parent);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 12:23 ` [patch 0/9] extended fdinfo via procfs series, v7 J. Bruce Fields
@ 2012-08-23 12:44   ` Cyrill Gorcunov
  2012-08-23 13:52     ` J. Bruce Fields
  2012-08-23 17:28     ` Cyrill Gorcunov
  0 siblings, 2 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 12:44 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 08:23:18AM -0400, J. Bruce Fields wrote:
> On Thu, Aug 23, 2012 at 02:43:23PM +0400, Cyrill Gorcunov wrote:
> > Hi guys,
> > 
> > here is updated version of the fdinfo via procfs series,
> > the changes from previous one are the following
> > 
> >  - fhandle is carried inside inotify mark but this feature
> >    is CONFIG dependent to not bloat the kernel for users
> >    who don't need it
> 
> As Al points out, this doesn't help much: if this feature is something a
> distro will want to provide, then in practice all their users are
> eventually going to end up with it turned on.
> 

Yes, I remember what Al has said, the problem is that this data attached
to inotify mark is not just a couple of bytes but rather about 136 bytes
per mark, and encoding this fhandle will take some cycles on mark creation
as well. Thus when in a sake of c/r we simply have no other way and are
to pay some trade off cost for c/r functionality, i don't think the
regular users (and note that CONFIG_CHECKPOINT_RESTORE is off by default)
should pay same cost for nothing. That's why I made it config dependant.
Again if you still think that making it config-option is a bad idea I'll
rip this symbols off, it's not a problem.

> Could you quantify the cost somehow?
> 

About 136 bytes per inotify mark.

> I wonder if you could get away with something less than MAX_HANDLE_SIZE?
> 128 bytes is the maximum allowable by NFSv4.  In practice I don't think
> any of our filesystems need more than 40 or so right now.

Look, Bruce, I would like to follow the limits we have #define'd in kernel,
because it makes code easier to support. I can #define some limit for
inotify fhandle but what should I print in fdinfo if say there is no
space left in buffer?

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 12:44   ` Cyrill Gorcunov
@ 2012-08-23 13:52     ` J. Bruce Fields
  2012-08-23 13:56       ` Cyrill Gorcunov
  2012-08-23 17:28     ` Cyrill Gorcunov
  1 sibling, 1 reply; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 13:52 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 04:44:27PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 23, 2012 at 08:23:18AM -0400, J. Bruce Fields wrote:
> > On Thu, Aug 23, 2012 at 02:43:23PM +0400, Cyrill Gorcunov wrote:
> > > Hi guys,
> > > 
> > > here is updated version of the fdinfo via procfs series,
> > > the changes from previous one are the following
> > > 
> > >  - fhandle is carried inside inotify mark but this feature
> > >    is CONFIG dependent to not bloat the kernel for users
> > >    who don't need it
> > 
> > As Al points out, this doesn't help much: if this feature is something a
> > distro will want to provide, then in practice all their users are
> > eventually going to end up with it turned on.
> > 
> 
> Yes, I remember what Al has said, the problem is that this data attached
> to inotify mark is not just a couple of bytes but rather about 136 bytes
> per mark, and encoding this fhandle will take some cycles on mark creation
> as well. Thus when in a sake of c/r we simply have no other way and are
> to pay some trade off cost for c/r functionality, i don't think the
> regular users (and note that CONFIG_CHECKPOINT_RESTORE is off by default)
> should pay same cost for nothing. That's why I made it config dependant.
> Again if you still think that making it config-option is a bad idea I'll
> rip this symbols off, it's not a problem.

I don't have any opinion on whether there should be a configuration
option.  Just want to make sure the cost when it's turned on is still
taken seriously.

--b.

> 
> > Could you quantify the cost somehow?
> > 
> 
> About 136 bytes per inotify mark.
> 
> > I wonder if you could get away with something less than MAX_HANDLE_SIZE?
> > 128 bytes is the maximum allowable by NFSv4.  In practice I don't think
> > any of our filesystems need more than 40 or so right now.
> 
> Look, Bruce, I would like to follow the limits we have #define'd in kernel,
> because it makes code easier to support. I can #define some limit for
> inotify fhandle but what should I print in fdinfo if say there is no
> space left in buffer?
> 
> 	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 13:52     ` J. Bruce Fields
@ 2012-08-23 13:56       ` Cyrill Gorcunov
  2012-08-23 15:25         ` J. Bruce Fields
  0 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 13:56 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 09:52:34AM -0400, J. Bruce Fields wrote:
> 
> I don't have any opinion on whether there should be a configuration
> option.  Just want to make sure the cost when it's turned on is still
> taken seriously.

I see. So, does the rest of series looks fine for you (except this config
moment)?

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present
  2012-08-23 12:34     ` Cyrill Gorcunov
@ 2012-08-23 15:22       ` J. Bruce Fields
  0 siblings, 0 replies; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 15:22 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 04:34:22PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 23, 2012 at 08:12:30AM -0400, J. Bruce Fields wrote:
> > On Thu, Aug 23, 2012 at 02:43:27PM +0400, Cyrill Gorcunov wrote:
> > > If there is no s_export_op present in a target superblock
> > > we might have nil dereference.
> > 
> > Is that NULL dereference possible with current code, or is it a check
> > you're adding to account for a new caller that you're about to add?
> > 
> > I believe it's the latter, but this would be a good thing to make clear
> > in the changelog.
> 
> With the current code it seems to be impossible (well, i can't be sure
> about nfs caller) because do_sys_name_to_handle does check for s_export_op
> to exist. Updated changelog below. After all I think not checking
> s_export_op was a mistake in general -- this routine is exported to
> other modules but has no a single line of comment about possibility
> of nil dereference.

Fine, just make sure that's explained in the changelog.

For distributors looking for patches to backport, "fix nil dereference"
may set off alarm bells unnecessarily.

--b.

> ---
> From: Cyrill Gorcunov <gorcunov@openvz.org>
> Subject: fs, exportfs: Escape nil dereference if no s_export_op present
> 
> This routine will be used to generate a file handle in fdinfo
> output for inotify subsystem, where if no s_export_op present
> the general export_encode_fh should be used. Thus add
> a test if s_export_op present inside exportfs_encode_fh itself.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> CC: Pavel Emelyanov <xemul@parallels.com>
> CC: Al Viro <viro@ZenIV.linux.org.uk>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: James Bottomley <jbottomley@parallels.com>
> CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Matthew Helsley <matt.helsley@gmail.com>
> CC: "J. Bruce Fields" <bfields@fieldses.org>
> CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/exportfs/expfs.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Index: linux-2.6.git/fs/exportfs/expfs.c
> ===================================================================
> --- linux-2.6.git.orig/fs/exportfs/expfs.c
> +++ linux-2.6.git/fs/exportfs/expfs.c
> @@ -357,7 +357,7 @@ int exportfs_encode_fh(struct dentry *de
>  		 */
>  		parent = p->d_inode;
>  	}
> -	if (nop->encode_fh)
> +	if (nop && nop->encode_fh)
>  		error = nop->encode_fh(inode, fid->raw, max_len, parent);
>  	else
>  		error = export_encode_fh(inode, fid, max_len, parent);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 13:56       ` Cyrill Gorcunov
@ 2012-08-23 15:25         ` J. Bruce Fields
  2012-08-23 17:02           ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 15:25 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 05:56:06PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 23, 2012 at 09:52:34AM -0400, J. Bruce Fields wrote:
> > 
> > I don't have any opinion on whether there should be a configuration
> > option.  Just want to make sure the cost when it's turned on is still
> > taken seriously.
> 
> I see. So, does the rest of series looks fine for you (except this config
> moment)?

You're using the common filehandle code instead of encoding your own
from scratch, which was my only serious objection.

I haven't reviewed the rest of it carefully.

--b.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 15:25         ` J. Bruce Fields
@ 2012-08-23 17:02           ` Cyrill Gorcunov
  2012-08-23 17:59             ` J. Bruce Fields
  0 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 17:02 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 11:25:45AM -0400, J. Bruce Fields wrote:
> On Thu, Aug 23, 2012 at 05:56:06PM +0400, Cyrill Gorcunov wrote:
> > On Thu, Aug 23, 2012 at 09:52:34AM -0400, J. Bruce Fields wrote:
> > > 
> > > I don't have any opinion on whether there should be a configuration
> > > option.  Just want to make sure the cost when it's turned on is still
> > > taken seriously.
> > 
> > I see. So, does the rest of series looks fine for you (except this config
> > moment)?
> 
> You're using the common filehandle code instead of encoding your own
> from scratch, which was my only serious objection.
> 

Wait, Bruce, now I'm confused, if I understand all correctly
using common filehandle code from exportfs is preferred, or
not? What you mean by "encoding your own from scratch"? Could
you please clarify?

> I haven't reviewed the rest of it carefully.

ok

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 12:44   ` Cyrill Gorcunov
  2012-08-23 13:52     ` J. Bruce Fields
@ 2012-08-23 17:28     ` Cyrill Gorcunov
  1 sibling, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 17:28 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 04:44:27PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 23, 2012 at 08:23:18AM -0400, J. Bruce Fields wrote:
> > On Thu, Aug 23, 2012 at 02:43:23PM +0400, Cyrill Gorcunov wrote:
> > > Hi guys,
> > > 
> > > here is updated version of the fdinfo via procfs series,
> > > the changes from previous one are the following
> > > 
> > >  - fhandle is carried inside inotify mark but this feature
> > >    is CONFIG dependent to not bloat the kernel for users
> > >    who don't need it
> > 
> > As Al points out, this doesn't help much: if this feature is something a
> > distro will want to provide, then in practice all their users are
> > eventually going to end up with it turned on.
> > 
> 
> Yes, I remember what Al has said, the problem is that this data attached
> to inotify mark is not just a couple of bytes but rather about 136 bytes
> per mark, and encoding this fhandle will take some cycles on mark creation
> as well. Thus when in a sake of c/r we simply have no other way and are
> to pay some trade off cost for c/r functionality, i don't think the
> regular users (and note that CONFIG_CHECKPOINT_RESTORE is off by default)
> should pay same cost for nothing. That's why I made it config dependant.
> Again if you still think that making it config-option is a bad idea I'll
> rip this symbols off, it's not a problem.

Btw, Bruce, I forgot to mention that Linus and Andrew asked us to wrap
all code related to c/r we bring in with CONFIG_CHECKPOINT_RESTORE symbol.
So while wrapping show_fdinfo member in file_operations with CONFIG might
be an overhead (as Al and Pavel pointed me) the fhandle in mark is reverse,
a good candidate for CONFIG wrapping I think.

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 17:02           ` Cyrill Gorcunov
@ 2012-08-23 17:59             ` J. Bruce Fields
  2012-08-23 18:03               ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: J. Bruce Fields @ 2012-08-23 17:59 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 09:02:39PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 23, 2012 at 11:25:45AM -0400, J. Bruce Fields wrote:
> > On Thu, Aug 23, 2012 at 05:56:06PM +0400, Cyrill Gorcunov wrote:
> > > On Thu, Aug 23, 2012 at 09:52:34AM -0400, J. Bruce Fields wrote:
> > > > 
> > > > I don't have any opinion on whether there should be a configuration
> > > > option.  Just want to make sure the cost when it's turned on is still
> > > > taken seriously.
> > > 
> > > I see. So, does the rest of series looks fine for you (except this config
> > > moment)?
> > 
> > You're using the common filehandle code instead of encoding your own
> > from scratch, which was my only serious objection.
> > 
> 
> Wait, Bruce, now I'm confused, if I understand all correctly
> using common filehandle code from exportfs is preferred,

Right, that's what I meant, sorry if I was unclear.

--b.

> or
> not? What you mean by "encoding your own from scratch"? Could
> you please clarify?
> 
> > I haven't reviewed the rest of it carefully.
> 
> ok
> 
> 	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 0/9] extended fdinfo via procfs series, v7
  2012-08-23 17:59             ` J. Bruce Fields
@ 2012-08-23 18:03               ` Cyrill Gorcunov
  0 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-23 18:03 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley, Matthew Helsley,
	aneesh.kumar

On Thu, Aug 23, 2012 at 01:59:01PM -0400, J. Bruce Fields wrote:
> > > You're using the common filehandle code instead of encoding your own
> > > from scratch, which was my only serious objection.
> > > 
> > 
> > Wait, Bruce, now I'm confused, if I understand all correctly
> > using common filehandle code from exportfs is preferred,
> 
> Right, that's what I meant, sorry if I was unclear.

I might be simply translating it wrong ;) Thanks Bruce!

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-23 10:43 ` [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
@ 2012-08-25 17:16   ` Al Viro
  2012-08-25 17:39     ` Cyrill Gorcunov
  2012-08-25 23:19     ` Al Viro
  0 siblings, 2 replies; 35+ messages in thread
From: Al Viro @ 2012-08-25 17:16 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Thu, Aug 23, 2012 at 02:43:24PM +0400, Cyrill Gorcunov wrote:
> This patch prepares the ground for further extension of
> /proc/pid/fd[info] handling code by moving fdinfo handling
> code into fs/proc/fd.c.
> 
> I think such move makes both fs/proc/base.c and fs/proc/fd.c
> easier to read.

BTW, looking at the other stuff in fs/proc/base.c, why the hell is
struct file * grabbed in proc_map_files_readdir()?  All we do with
it is passing it to proc_fill_cache(), which passes it to
proc_map_files_instantiate(), which looks at two sodding bits
in file->f_mode.  Then we go and fput() all those struct file
references we'd been putting into the array...  What for?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 17:16   ` Al Viro
@ 2012-08-25 17:39     ` Cyrill Gorcunov
  2012-08-25 17:55       ` Al Viro
  2012-08-25 23:19     ` Al Viro
  1 sibling, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-25 17:39 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 06:16:05PM +0100, Al Viro wrote:
> On Thu, Aug 23, 2012 at 02:43:24PM +0400, Cyrill Gorcunov wrote:
> > This patch prepares the ground for further extension of
> > /proc/pid/fd[info] handling code by moving fdinfo handling
> > code into fs/proc/fd.c.
> > 
> > I think such move makes both fs/proc/base.c and fs/proc/fd.c
> > easier to read.
> 
> BTW, looking at the other stuff in fs/proc/base.c, why the hell is
> struct file * grabbed in proc_map_files_readdir()?  All we do with
> it is passing it to proc_fill_cache(), which passes it to
> proc_map_files_instantiate(), which looks at two sodding bits
> in file->f_mode.  Then we go and fput() all those struct file
> references we'd been putting into the array...  What for?

Well, this could be simplified indeed, if I understand you correctly
you propose just save f_mode in flexible array and use it instead
of struct file, right? (which will require to rewrite code a bit)

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 17:39     ` Cyrill Gorcunov
@ 2012-08-25 17:55       ` Al Viro
  2012-08-25 18:58         ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2012-08-25 17:55 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 09:39:58PM +0400, Cyrill Gorcunov wrote:
> On Sat, Aug 25, 2012 at 06:16:05PM +0100, Al Viro wrote:
> > On Thu, Aug 23, 2012 at 02:43:24PM +0400, Cyrill Gorcunov wrote:
> > > This patch prepares the ground for further extension of
> > > /proc/pid/fd[info] handling code by moving fdinfo handling
> > > code into fs/proc/fd.c.
> > > 
> > > I think such move makes both fs/proc/base.c and fs/proc/fd.c
> > > easier to read.
> > 
> > BTW, looking at the other stuff in fs/proc/base.c, why the hell is
> > struct file * grabbed in proc_map_files_readdir()?  All we do with
> > it is passing it to proc_fill_cache(), which passes it to
> > proc_map_files_instantiate(), which looks at two sodding bits
> > in file->f_mode.  Then we go and fput() all those struct file
> > references we'd been putting into the array...  What for?
> 
> Well, this could be simplified indeed, if I understand you correctly
> you propose just save f_mode in flexible array and use it instead
> of struct file, right? (which will require to rewrite code a bit)

Yes.  FWIW, proc_fill_cache() is really atrocious ;-/  Not to mention
anything else, if we ever get a negative dentry there, we have a dentry
leak.  I don't think it's possible in practice, but...  Furthermore,
        if (!child || IS_ERR(child) || !child->d_inode)
                goto end_instantiate;
        inode = child->d_inode;
        if (inode) {
                ino = inode->i_ino;
                type = inode->i_mode >> 12;
        }
        dput(child);
looks really weird - how can we possibly get !inode when we'd just
checked that child->inode is non-NULL?  Moreover, that find_inode_number()
a bit below is also as weird as it gets - in effect, we repeat
d_lookup() we'd just done earlier.  How *can* it get us anything?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 17:55       ` Al Viro
@ 2012-08-25 18:58         ` Cyrill Gorcunov
  2012-08-25 19:12           ` Al Viro
  0 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-25 18:58 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 06:55:04PM +0100, Al Viro wrote:
> > Well, this could be simplified indeed, if I understand you correctly
> > you propose just save f_mode in flexible array and use it instead
> > of struct file, right? (which will require to rewrite code a bit)
> 
> Yes.  FWIW, proc_fill_cache() is really atrocious ;-/  Not to mention

OK, thanks. I'm putting this cleanup task in my big todo list. Hope I'll
manage on the next week with it.

> anything else, if we ever get a negative dentry there, we have a dentry
> leak.  I don't think it's possible in practice, but...  Furthermore,

could you please elaborate, you mean this string?

	struct dentry *child, *dir = filp->f_path.dentry;

>         if (!child || IS_ERR(child) || !child->d_inode)
>                 goto end_instantiate;

this could be IS_ERR_OR_NULL i guess

>         inode = child->d_inode;
>         if (inode) {
>                 ino = inode->i_ino;
>                 type = inode->i_mode >> 12;
>         }
>         dput(child);
> looks really weird - how can we possibly get !inode when we'd just
> checked that child->inode is non-NULL?  Moreover, that find_inode_number()
> a bit below is also as weird as it gets - in effect, we repeat
> d_lookup() we'd just done earlier.  How *can* it get us anything?

to be fair -- I don't know ;) I mean I didn't invent this function
but it definitely could be cleaned up. That was partly a reason
why I've moved fd related code to fd.c|h (base.c is really big
in content already and it's always a problem at least for me to
follow big "c" files).

I can try to clean this code up, but not in this patch series,
just to not mess the series even more.

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 18:58         ` Cyrill Gorcunov
@ 2012-08-25 19:12           ` Al Viro
  2012-08-25 19:43             ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2012-08-25 19:12 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 10:58:29PM +0400, Cyrill Gorcunov wrote:
> On Sat, Aug 25, 2012 at 06:55:04PM +0100, Al Viro wrote:
> > > Well, this could be simplified indeed, if I understand you correctly
> > > you propose just save f_mode in flexible array and use it instead
> > > of struct file, right? (which will require to rewrite code a bit)
> > 
> > Yes.  FWIW, proc_fill_cache() is really atrocious ;-/  Not to mention
> 
> OK, thanks. I'm putting this cleanup task in my big todo list. Hope I'll
> manage on the next week with it.
> 
> > anything else, if we ever get a negative dentry there, we have a dentry
> > leak.  I don't think it's possible in practice, but...  Furthermore,
> 
> could you please elaborate, you mean this string?

I mean that if we get to that if (... || !child->d_inode) and end up
evaluating the last part at all, we have acquired a reference to that
struct dentry.  And if that last part ends up being true (i.e. if it's
a negative dentry), we'll return from function without having dropped
the reference we'd acquired.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 19:12           ` Al Viro
@ 2012-08-25 19:43             ` Cyrill Gorcunov
  2012-08-25 21:52               ` Al Viro
  0 siblings, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-25 19:43 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 08:12:18PM +0100, Al Viro wrote:
> On Sat, Aug 25, 2012 at 10:58:29PM +0400, Cyrill Gorcunov wrote:
> > On Sat, Aug 25, 2012 at 06:55:04PM +0100, Al Viro wrote:
> > > > Well, this could be simplified indeed, if I understand you correctly
> > > > you propose just save f_mode in flexible array and use it instead
> > > > of struct file, right? (which will require to rewrite code a bit)
> > > 
> > > Yes.  FWIW, proc_fill_cache() is really atrocious ;-/  Not to mention
> > 
> > OK, thanks. I'm putting this cleanup task in my big todo list. Hope I'll
> > manage on the next week with it.
> > 
> > > anything else, if we ever get a negative dentry there, we have a dentry
> > > leak.  I don't think it's possible in practice, but...  Furthermore,
> > 
> > could you please elaborate, you mean this string?
> 
> I mean that if we get to that if (... || !child->d_inode) and end up
> evaluating the last part at all, we have acquired a reference to that
> struct dentry.  And if that last part ends up being true (i.e. if it's
> a negative dentry), we'll return from function without having dropped
> the reference we'd acquired.

Would the patch below improve the code? Look, I've not dropped
find_inode_number call since it's a bit unclear for me what
would happen if !child case hit

	child = d_lookup(dir, &qname);
	if (!child) {
		struct dentry *new = d_alloc(dir, &qname);
		if (new) {
			child = instantiate(dir->d_inode, new, task, ptr);
			if (child)
				dput(new);
			else
				child = new;
		}
	}

can we be sure that i_ino won't be zero here?
---
 fs/proc/base.c |   27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -1650,7 +1650,6 @@ int proc_fill_cache(struct file *filp, v
 	instantiate_t instantiate, struct task_struct *task, const void *ptr)
 {
 	struct dentry *child, *dir = filp->f_path.dentry;
-	struct inode *inode;
 	struct qstr qname;
 	ino_t ino = 0;
 	unsigned type = DT_UNKNOWN;
@@ -1661,8 +1660,7 @@ int proc_fill_cache(struct file *filp, v
 
 	child = d_lookup(dir, &qname);
 	if (!child) {
-		struct dentry *new;
-		new = d_alloc(dir, &qname);
+		struct dentry *new = d_alloc(dir, &qname);
 		if (new) {
 			child = instantiate(dir->d_inode, new, task, ptr);
 			if (child)
@@ -1671,19 +1669,20 @@ int proc_fill_cache(struct file *filp, v
 				child = new;
 		}
 	}
-	if (!child || IS_ERR(child) || !child->d_inode)
-		goto end_instantiate;
-	inode = child->d_inode;
-	if (inode) {
-		ino = inode->i_ino;
-		type = inode->i_mode >> 12;
-	}
+	if (IS_ERR_OR_NULL(child))
+		goto err;
+	if (!child->d_inode)
+		goto err_put;
+	ino = child->d_inode->i_ino;
+	type = child->d_inode->i_mode >> 12;
+err_put:
 	dput(child);
-end_instantiate:
-	if (!ino)
+err:
+	if (!ino) {
 		ino = find_inode_number(dir, &qname);
-	if (!ino)
-		ino = 1;
+		if (!ino)
+			ino = 1;
+	}
 	return filldir(dirent, name, len, filp->f_pos, ino, type);
 }
 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 19:43             ` Cyrill Gorcunov
@ 2012-08-25 21:52               ` Al Viro
  0 siblings, 0 replies; 35+ messages in thread
From: Al Viro @ 2012-08-25 21:52 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 11:43:25PM +0400, Cyrill Gorcunov wrote:

> Would the patch below improve the code? Look, I've not dropped
> find_inode_number call since it's a bit unclear for me what
> would happen if !child case hit
> 
> 	child = d_lookup(dir, &qname);
> 	if (!child) {
> 		struct dentry *new = d_alloc(dir, &qname);
> 		if (new) {
> 			child = instantiate(dir->d_inode, new, task, ptr);
> 			if (child)
> 				dput(new);
> 			else
> 				child = new;
> 		}
> 	}
> 
> can we be sure that i_ino won't be zero here?

First of all, ->i_ino is not going to be zero for any procfs inode.
As for !child case, that's possible only if d_lookup() returns NULL
*and* d_alloc() fails.  In that case find_inode_number() will call
d_hash_and_lookup(), which will call d_lookup(), get NULL from it and
return NULL to find_inode_number().  Which will return 0 to the
caller.

AFAICS, if find_inode_number() is called there at all, it will return 0.
IOW, this if (!ino) ino = find_inode_number(...); is a no-op.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-08-25 17:16   ` Al Viro
  2012-08-25 17:39     ` Cyrill Gorcunov
@ 2012-08-25 23:19     ` Al Viro
  1 sibling, 0 replies; 35+ messages in thread
From: Al Viro @ 2012-08-25 23:19 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sat, Aug 25, 2012 at 06:16:05PM +0100, Al Viro wrote:
> On Thu, Aug 23, 2012 at 02:43:24PM +0400, Cyrill Gorcunov wrote:
> > This patch prepares the ground for further extension of
> > /proc/pid/fd[info] handling code by moving fdinfo handling
> > code into fs/proc/fd.c.
> > 
> > I think such move makes both fs/proc/base.c and fs/proc/fd.c
> > easier to read.
> 
> BTW, looking at the other stuff in fs/proc/base.c, why the hell is
> struct file * grabbed in proc_map_files_readdir()?  All we do with
> it is passing it to proc_fill_cache(), which passes it to
> proc_map_files_instantiate(), which looks at two sodding bits
> in file->f_mode.  Then we go and fput() all those struct file
> references we'd been putting into the array...  What for?

Another thing:
			f_flags = fd_file->f_flags & ~O_CLOEXEC;
in there is really wrong; we shouldn't leak that bit into ->f_flags
at all (which is properly fixed in fs/open.c).  Close-on-exec is
a property of descriptor, not of file...

tid_fd_revalidate(): these dances with get_files_struct()/put_files_struct()
are pointless.  We really only need "is it opened" + f_mode if it is.
Extracting that information is cheap enough to have it done right under
task_lock() (see what get_files_struct() is doing); no need to mess with
files_struct refcount, etc.

proc_fd_link() and seq_fd_open(): ditto.

As the matter of fact, the only place in there where get_files_struct() is
warranted is proc_readfd_common().

intantiate callback: sodding atrocity; as the absolute minimum it should
lose its "dir" argument.  dentry->d_parent->d_inode is stable (we are
holding ->i_mutex on parent in all callers) and it's equal to dir in all
cases.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-23 10:43 ` [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 Cyrill Gorcunov
@ 2012-08-26  2:46   ` Al Viro
  2012-08-26  8:13     ` Cyrill Gorcunov
  2012-08-26 14:28     ` Cyrill Gorcunov
  0 siblings, 2 replies; 35+ messages in thread
From: Al Viro @ 2012-08-26  2:46 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Thu, Aug 23, 2012 at 02:43:25PM +0400, Cyrill Gorcunov wrote:
> This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
> is needed to extend seq operations and plug in auxiliary fdinfo provides
> from subsystems like eventfd/eventpoll/fsnotify.
> 
> Note the proc_fd_link no longer call for proc_fd_info, simply because
> proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
> prototype).

Actually, now that I've looked at it a bit more...  You've just introduced
an ABI change here.  Look:

echo foo > /tmp/a
exec 8</tmp/a			# fd 8 reads from /tmp/a
read i <&8			# read line from it
exec 9</proc/self/fdinfo/8	# fd 9 is /proc/self/fdinfo/8
exec 8</tmp/a			# close fd 8 and reopen it to the same /tmp/a
cat <&9				# now read from fd 9

With the mainline it will print
pos:  0
flags:  0100000

With that commit you will get
pos:    4
flags:  0100000

since the file you've opened refers to what used to be at fd 8 at the
moment of open(2), not read(2).  It may or may not be harmless, but it
definitely is a userland ABI change.  And that way it's actually an
extra PITA for yourself - think what /proc/self/fdinfo/9 should contain
now!  That's right, you've got hidden state there and would need to
print it to be able to reconstruct the state on restart.  Only it doesn't
end just there - what if you've taken that one step further and got the
struct file stashed in there at open(2) time also of the same kind?

IMO doing that at open() time is just a headache for no good reason -
resolving descriptor to struct file * at read() time as we do now
is much saner.  Better do that in your ->show(), since you are using
a single-shot iterator anyway...

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-26  2:46   ` Al Viro
@ 2012-08-26  8:13     ` Cyrill Gorcunov
  2012-08-26 14:28     ` Cyrill Gorcunov
  1 sibling, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-26  8:13 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
> On Thu, Aug 23, 2012 at 02:43:25PM +0400, Cyrill Gorcunov wrote:
> > This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
> > is needed to extend seq operations and plug in auxiliary fdinfo provides
> > from subsystems like eventfd/eventpoll/fsnotify.
> > 
> > Note the proc_fd_link no longer call for proc_fd_info, simply because
> > proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
> > prototype).
> 
> Actually, now that I've looked at it a bit more...  You've just introduced
> an ABI change here.  Look:

Crap, ineed. Thanks, Al! I'll fix it up, sorry.

...

> IMO doing that at open() time is just a headache for no good reason -
> resolving descriptor to struct file * at read() time as we do now
> is much saner.  Better do that in your ->show(), since you are using
> a single-shot iterator anyway...

Oh, thanks for the hint, Al! I'll rework and send an updated version.

	Cyrill

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-26  2:46   ` Al Viro
  2012-08-26  8:13     ` Cyrill Gorcunov
@ 2012-08-26 14:28     ` Cyrill Gorcunov
  2012-08-26 15:05       ` Al Viro
  1 sibling, 1 reply; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-26 14:28 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
> 
> IMO doing that at open() time is just a headache for no good reason -
> resolving descriptor to struct file * at read() time as we do now
> is much saner.  Better do that in your ->show(), since you are using
> a single-shot iterator anyway...

Al, the updated version is below. I suppose I can grab proc inode then
and lookup for file position and flags at show method. (I remember
what you've said about O_CLOEXEC bit, but I'll address this in
another patch).
---
From: Cyrill Gorcunov <gorcunov@openvz.org>
Subject: procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v3

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Note the proc_fd_link no longer call for proc_fd_info, simply because
proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
prototype).

v2 (by Al Viro):
 - Don't use helper function with optional arguments, thus proc_fd_info get deprecated
 - Use proc_fdinfo structure with seq_file embedded, thus we can use container_of helper
 - Use fput to free reference to the file we've grabbed

v3 (by Al Viro):
 - Don't grab struct file at read(2) time since it breaks ABI, instead all
   manipulations should be done at ->show() call.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/proc/fd.c |  142 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 95 insertions(+), 47 deletions(-)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -6,61 +6,100 @@
 #include <linux/namei.h>
 #include <linux/pid.h>
 #include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
 
 #include <linux/proc_fs.h>
 
 #include "internal.h"
 #include "fd.h"
 
-#define PROC_FDINFO_MAX 64
+struct proc_fdinfo {
+	struct seq_file	m;
+	struct inode	*inode;
+};
 
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_show(struct seq_file *m, void *v)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct proc_fdinfo *fdinfo = container_of(m, struct proc_fdinfo, m);
 	struct files_struct *files = NULL;
-	int fd = proc_fd(inode);
-	struct file *file;
+	int f_flags = 0, ret = -ENOENT;
+	struct file *file = NULL;
+	struct task_struct *task;
+
+	task = get_proc_task(fdinfo->inode);
+	if (!task)
+		return -ENOENT;
+
+	files = get_files_struct(task);
+	put_task_struct(task);
 
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
 	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
+		int fd = proc_fd(fdinfo->inode);
+
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
+			struct fdtable *fdt = files_fdtable(files);
 
-			fdt = files_fdtable(files);
 			f_flags = file->f_flags & ~O_CLOEXEC;
 			if (close_on_exec(fd, fdt))
 				f_flags |= O_CLOEXEC;
 
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			get_file(file);
+			ret = 0;
 		}
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+
+	if (!ret) {
+                seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+			   (long long)file->f_pos, f_flags);
+		fput(file);
+	}
+
+	return ret;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+	struct proc_fdinfo *fdinfo;
+	int ret = 0;
+
+	fdinfo = kmalloc(sizeof(*fdinfo), GFP_KERNEL);
+	if (!fdinfo)
+		return -ENOMEM;
+
+	ihold(inode);
+	fdinfo->inode = inode;
+
+	file->private_data = &fdinfo->m;
+	ret = single_open(file, seq_show, NULL);
+	if (ret) {
+		iput(inode);
+		kfree(fdinfo);
+	}
+
+	return ret;
+}
+
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+	struct proc_fdinfo *fdinfo =
+		container_of((struct seq_file *)file->private_data,
+			     struct proc_fdinfo, m);
+	iput(fdinfo->inode);
+	return single_release(inode, file);
 }
 
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open		= seq_fdinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_fdinfo_release,
+};
+
 static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct files_struct *files;
@@ -130,7 +169,32 @@ static const struct dentry_operations ti
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(dentry->d_inode, path, NULL);
+	struct files_struct *files = NULL;
+	struct task_struct *task;
+	int ret = -ENOENT;
+
+	task = get_proc_task(dentry->d_inode);
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+
+	if (files) {
+		int fd = proc_fd(dentry->d_inode);
+		struct file *fd_file;
+
+		spin_lock(&files->file_lock);
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			*path = fd_file->f_path;
+			path_get(&fd_file->f_path);
+			ret = 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+
+	return ret;
 }
 
 static struct dentry *
@@ -245,22 +309,6 @@ out_no_task:
 	return retval;
 }
 
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
 {
 	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-26 14:28     ` Cyrill Gorcunov
@ 2012-08-26 15:05       ` Al Viro
  2012-08-26 15:10         ` Cyrill Gorcunov
  0 siblings, 1 reply; 35+ messages in thread
From: Al Viro @ 2012-08-26 15:05 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sun, Aug 26, 2012 at 06:28:20PM +0400, Cyrill Gorcunov wrote:
> On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
> > 
> > IMO doing that at open() time is just a headache for no good reason -
> > resolving descriptor to struct file * at read() time as we do now
> > is much saner.  Better do that in your ->show(), since you are using
> > a single-shot iterator anyway...
> 
> Al, the updated version is below. I suppose I can grab proc inode then
> and lookup for file position and flags at show method. (I remember
> what you've said about O_CLOEXEC bit, but I'll address this in
> another patch).

Applied, with a couple of changes:
	* there's no need for those games with ihold/iput - opened file pins
its inode down just fine, TYVM.
	* struct fd_info is pointless in that form - the last argument
of single_open() will end up in seq_file ->private, so let's just pass
the inode there and use m->private in ->show().

I'll push that into vfs.git#master (along with the previous patch) in a few
hours.

O_CLOEXEC is taken care of in my tree.  FWIW, I'm consolidating descriptor
handling in general into fs/file.c (and I'm seriously tempted to rename
that sucker to something like fs/descriptors.c or fs/fdtable.c); some of
that stuff is already in #master.  I'm probably going to move some of the
code from your fs/proc/fd.c there as well...

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2
  2012-08-26 15:05       ` Al Viro
@ 2012-08-26 15:10         ` Cyrill Gorcunov
  0 siblings, 0 replies; 35+ messages in thread
From: Cyrill Gorcunov @ 2012-08-26 15:10 UTC (permalink / raw)
  To: Al Viro
  Cc: linux-kernel, linux-fsdevel, Alexey Dobriyan, Andrew Morton,
	Pavel Emelyanov, James Bottomley, Matthew Helsley, aneesh.kumar,
	bfields

On Sun, Aug 26, 2012 at 04:05:20PM +0100, Al Viro wrote:
> 
> Applied, with a couple of changes:
>  * there's no need for those games with ihold/iput - opened file pins
>    its inode down just fine, TYVM.

Ah, yeah, thanks!

>  * struct fd_info is pointless in that form - the last argument
>    of single_open() will end up in seq_file ->private, so let's just pass
>    the inode there and use m->private in ->show().

Yes. Thanks!

> I'll push that into vfs.git#master (along with the previous patch) in a few hours.
> 
> O_CLOEXEC is taken care of in my tree.  FWIW, I'm consolidating descriptor
> handling in general into fs/file.c (and I'm seriously tempted to rename
> that sucker to something like fs/descriptors.c or fs/fdtable.c); some of
> that stuff is already in #master.  I'm probably going to move some of the
> code from your fs/proc/fd.c there as well...

OK, so I'll fetch your tree and rebase the rest of series on top then,
to continue fdinfo handling. Sounds good?

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2012-08-26 15:11 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-23 10:43 [patch 0/9] extended fdinfo via procfs series, v7 Cyrill Gorcunov
2012-08-23 10:43 ` [patch 1/9] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
2012-08-25 17:16   ` Al Viro
2012-08-25 17:39     ` Cyrill Gorcunov
2012-08-25 17:55       ` Al Viro
2012-08-25 18:58         ` Cyrill Gorcunov
2012-08-25 19:12           ` Al Viro
2012-08-25 19:43             ` Cyrill Gorcunov
2012-08-25 21:52               ` Al Viro
2012-08-25 23:19     ` Al Viro
2012-08-23 10:43 ` [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 Cyrill Gorcunov
2012-08-26  2:46   ` Al Viro
2012-08-26  8:13     ` Cyrill Gorcunov
2012-08-26 14:28     ` Cyrill Gorcunov
2012-08-26 15:05       ` Al Viro
2012-08-26 15:10         ` Cyrill Gorcunov
2012-08-23 10:43 ` [patch 3/9] procfs: Add ability to plug in auxiliary fdinfo providers Cyrill Gorcunov
2012-08-23 10:43 ` [patch 4/9] fs, exportfs: Fix nil dereference if no s_export_op present Cyrill Gorcunov
2012-08-23 12:12   ` J. Bruce Fields
2012-08-23 12:34     ` Cyrill Gorcunov
2012-08-23 15:22       ` J. Bruce Fields
2012-08-23 10:43 ` [patch 5/9] fs, notify: Add file handle entry into inotify_inode_mark Cyrill Gorcunov
2012-08-23 10:43 ` [patch 6/9] fs, notify: Add procfs fdinfo helper v4 Cyrill Gorcunov
2012-08-23 10:43 ` [patch 7/9] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
2012-08-23 10:43 ` [patch 8/9] fs, epoll: Add procfs fdinfo helper v2 Cyrill Gorcunov
2012-08-23 10:43 ` [patch 9/9] fdinfo: Show sigmask for signalfd fd v2 Cyrill Gorcunov
2012-08-23 12:23 ` [patch 0/9] extended fdinfo via procfs series, v7 J. Bruce Fields
2012-08-23 12:44   ` Cyrill Gorcunov
2012-08-23 13:52     ` J. Bruce Fields
2012-08-23 13:56       ` Cyrill Gorcunov
2012-08-23 15:25         ` J. Bruce Fields
2012-08-23 17:02           ` Cyrill Gorcunov
2012-08-23 17:59             ` J. Bruce Fields
2012-08-23 18:03               ` Cyrill Gorcunov
2012-08-23 17:28     ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).