All of lore.kernel.org
 help / color / mirror / Atom feed
* [rfc 0/7] [rfc] procfs, fdinfo seqfile providers
@ 2012-06-27 11:01 Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 1/7] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-fsdevel

Hi guys,

here is a bit updated version of procfs fdinfo providers (I've ported
the series to 3.5-rc4 kernel). Please review if you have time.

The base idea remains the same -- provide additional information
specified for file type (fanotify/eventpoll) via /proc/pid/fdinfo/$fd
suitable to restore suct file.

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 1/7] procfs: Move /proc/pid/fd[info] handling code to fd.[ch]
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file Cyrill Gorcunov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-base-proc-4 --]
[-- Type: text/plain, Size: 23104 bytes --]

This patch prepares the ground for further extension of
/proc/pid/fd[info] handling code by moving fdinfo handling
code into fs/proc/fd.c.

I think such move makes both fs/proc/base.c and fs/proc/fd.c
easier to read.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/proc/Makefile   |    2 
 fs/proc/base.c     |  388 -----------------------------------------------------
 fs/proc/fd.c       |  351 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/fd.h       |   14 +
 fs/proc/internal.h |   48 ++++++
 5 files changed, 416 insertions(+), 387 deletions(-)

Index: linux-2.6.git/fs/proc/Makefile
===================================================================
--- linux-2.6.git.orig/fs/proc/Makefile
+++ linux-2.6.git/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		proc_tty.o
+		proc_tty.o fd.o
 proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -90,6 +90,7 @@
 #endif
 #include <trace/events/oom.h>
 #include "internal.h"
+#include "fd.h"
 
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
 
-static int proc_fd_permission(struct inode *inode, int mask);
-
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -1485,7 +1484,7 @@ out:
 	return error;
 }
 
-static const struct inode_operations proc_pid_link_inode_operations = {
+const struct inode_operations proc_pid_link_inode_operations = {
 	.readlink	= proc_pid_readlink,
 	.follow_link	= proc_pid_follow_link,
 	.setattr	= proc_setattr,
@@ -1494,21 +1493,6 @@ static const struct inode_operations pro
 
 /* building an inode */
 
-static int task_dumpable(struct task_struct *task)
-{
-	int dumpable = 0;
-	struct mm_struct *mm;
-
-	task_lock(task);
-	mm = task->mm;
-	if (mm)
-		dumpable = get_dumpable(mm);
-	task_unlock(task);
-	if(dumpable == 1)
-		return 1;
-	return 0;
-}
-
 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
 	struct inode * inode;
@@ -1634,15 +1618,6 @@ int pid_revalidate(struct dentry *dentry
 	return 0;
 }
 
-static int pid_delete_dentry(const struct dentry * dentry)
-{
-	/* Is the task we represent dead?
-	 * If so, then don't put the dentry on the lru list,
-	 * kill it immediately.
-	 */
-	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
@@ -1705,289 +1680,6 @@ end_instantiate:
 	return filldir(dirent, name, len, filp->f_pos, ino, type);
 }
 
-static unsigned name_to_int(struct dentry *dentry)
-{
-	const char *name = dentry->d_name.name;
-	int len = dentry->d_name.len;
-	unsigned n = 0;
-
-	if (len > 1 && *name == '0')
-		goto out;
-	while (len-- > 0) {
-		unsigned c = *name++ - '0';
-		if (c > 9)
-			goto out;
-		if (n >= (~0U-9)/10)
-			goto out;
-		n *= 10;
-		n += c;
-	}
-	return n;
-out:
-	return ~0U;
-}
-
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
-{
-	struct task_struct *task = get_proc_task(inode);
-	struct files_struct *files = NULL;
-	struct file *file;
-	int fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
-	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
-		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
-
-			fdt = files_fdtable(files);
-			f_flags = file->f_flags & ~O_CLOEXEC;
-			if (close_on_exec(fd, fdt))
-				f_flags |= O_CLOEXEC;
-
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
-		}
-		spin_unlock(&files->file_lock);
-		put_files_struct(files);
-	}
-	return -ENOENT;
-}
-
-static int proc_fd_link(struct dentry *dentry, struct path *path)
-{
-	return proc_fd_info(dentry->d_inode, path, NULL);
-}
-
-static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	struct inode *inode;
-	struct task_struct *task;
-	int fd;
-	struct files_struct *files;
-	const struct cred *cred;
-
-	if (nd && nd->flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	inode = dentry->d_inode;
-	task = get_proc_task(inode);
-	fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		if (files) {
-			struct file *file;
-			rcu_read_lock();
-			file = fcheck_files(files, fd);
-			if (file) {
-				unsigned f_mode = file->f_mode;
-
-				rcu_read_unlock();
-				put_files_struct(files);
-
-				if (task_dumpable(task)) {
-					rcu_read_lock();
-					cred = __task_cred(task);
-					inode->i_uid = cred->euid;
-					inode->i_gid = cred->egid;
-					rcu_read_unlock();
-				} else {
-					inode->i_uid = GLOBAL_ROOT_UID;
-					inode->i_gid = GLOBAL_ROOT_GID;
-				}
-
-				if (S_ISLNK(inode->i_mode)) {
-					unsigned i_mode = S_IFLNK;
-					if (f_mode & FMODE_READ)
-						i_mode |= S_IRUSR | S_IXUSR;
-					if (f_mode & FMODE_WRITE)
-						i_mode |= S_IWUSR | S_IXUSR;
-					inode->i_mode = i_mode;
-				}
-
-				security_task_to_inode(task, inode);
-				put_task_struct(task);
-				return 1;
-			}
-			rcu_read_unlock();
-			put_files_struct(files);
-		}
-		put_task_struct(task);
-	}
-	d_drop(dentry);
-	return 0;
-}
-
-static const struct dentry_operations tid_fd_dentry_operations =
-{
-	.d_revalidate	= tid_fd_revalidate,
-	.d_delete	= pid_delete_dentry,
-};
-
-static struct dentry *proc_fd_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-
-	inode->i_mode = S_IFLNK;
-	inode->i_op = &proc_pid_link_inode_operations;
-	inode->i_size = 64;
-	ei->op.proc_get_link = proc_fd_link;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, NULL))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfd_common(struct inode *dir,
-					   struct dentry *dentry,
-					   instantiate_t instantiate)
-{
-	struct task_struct *task = get_proc_task(dir);
-	unsigned fd = name_to_int(dentry);
-	struct dentry *result = ERR_PTR(-ENOENT);
-
-	if (!task)
-		goto out_no_task;
-	if (fd == ~0U)
-		goto out;
-
-	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
-out:
-	put_task_struct(task);
-out_no_task:
-	return result;
-}
-
-static int proc_readfd_common(struct file * filp, void * dirent,
-			      filldir_t filldir, instantiate_t instantiate)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *p = get_proc_task(inode);
-	unsigned int fd, ino;
-	int retval;
-	struct files_struct * files;
-
-	retval = -ENOENT;
-	if (!p)
-		goto out_no_task;
-	retval = 0;
-
-	fd = filp->f_pos;
-	switch (fd) {
-		case 0:
-			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		case 1:
-			ino = parent_ino(dentry);
-			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		default:
-			files = get_files_struct(p);
-			if (!files)
-				goto out;
-			rcu_read_lock();
-			for (fd = filp->f_pos-2;
-			     fd < files_fdtable(files)->max_fds;
-			     fd++, filp->f_pos++) {
-				char name[PROC_NUMBUF];
-				int len;
-				int rv;
-
-				if (!fcheck_files(files, fd))
-					continue;
-				rcu_read_unlock();
-
-				len = snprintf(name, sizeof(name), "%d", fd);
-				rv = proc_fill_cache(filp, dirent, filldir,
-						     name, len, instantiate, p,
-						     (void *)(unsigned long)fd);
-				if (rv < 0)
-					goto out_fd_loop;
-				rcu_read_lock();
-			}
-			rcu_read_unlock();
-out_fd_loop:
-			put_files_struct(files);
-	}
-out:
-	put_task_struct(p);
-out_no_task:
-	return retval;
-}
-
-static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
-				    struct nameidata *nd)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
-}
-
-static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
-}
-
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				      size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
-static const struct file_operations proc_fd_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfd,
-	.llseek		= default_llseek,
-};
-
 #ifdef CONFIG_CHECKPOINT_RESTORE
 
 /*
@@ -2330,82 +2022,6 @@ static const struct file_operations proc
 
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
-/*
- * /proc/pid/fd needs a special permission handler so that a process can still
- * access /proc/self/fd after it has executed a setuid().
- */
-static int proc_fd_permission(struct inode *inode, int mask)
-{
-	int rv = generic_permission(inode, mask);
-	if (rv == 0)
-		return 0;
-	if (task_pid(current) == proc_pid(inode))
-		rv = 0;
-	return rv;
-}
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fd_inode_operations = {
-	.lookup		= proc_lookupfd,
-	.permission	= proc_fd_permission,
-	.setattr	= proc_setattr,
-};
-
-static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-	inode->i_mode = S_IFREG | S_IRUSR;
-	inode->i_fop = &proc_fdinfo_file_operations;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, NULL))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfdinfo(struct inode *dir,
-					struct dentry *dentry,
-					struct nameidata *nd)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
-}
-
-static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir,
-				  proc_fdinfo_instantiate);
-}
-
-static const struct file_operations proc_fdinfo_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfdinfo,
-	.llseek		= default_llseek,
-};
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fdinfo_inode_operations = {
-	.lookup		= proc_lookupfdinfo,
-	.setattr	= proc_setattr,
-};
-
-
 static struct dentry *proc_pident_instantiate(struct inode *dir,
 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/proc/fd.c
@@ -0,0 +1,351 @@
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/dcache.h>
+#include <linux/path.h>
+#include <linux/fdtable.h>
+#include <linux/namei.h>
+#include <linux/pid.h>
+#include <linux/security.h>
+
+#include <linux/proc_fs.h>
+
+#include "internal.h"
+#include "fd.h"
+
+#define PROC_FDINFO_MAX 64
+
+static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct files_struct *files = NULL;
+	int fd = proc_fd(inode);
+	struct file *file;
+
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+	if (files) {
+		/*
+		 * We are not taking a ref to the file structure, so we must
+		 * hold ->file_lock.
+		 */
+		spin_lock(&files->file_lock);
+		file = fcheck_files(files, fd);
+		if (file) {
+			unsigned int f_flags;
+			struct fdtable *fdt;
+
+			fdt = files_fdtable(files);
+			f_flags = file->f_flags & ~O_CLOEXEC;
+			if (close_on_exec(fd, fdt))
+				f_flags |= O_CLOEXEC;
+
+			if (path) {
+				*path = file->f_path;
+				path_get(&file->f_path);
+			}
+			if (info)
+				snprintf(info, PROC_FDINFO_MAX,
+					 "pos:\t%lli\n"
+					 "flags:\t0%o\n",
+					 (long long) file->f_pos,
+					 f_flags);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+			return 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	return -ENOENT;
+}
+
+static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	struct files_struct *files;
+	struct task_struct *task;
+	const struct cred *cred;
+	struct inode *inode;
+	int fd;
+
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	fd = proc_fd(inode);
+
+	if (task) {
+		files = get_files_struct(task);
+		if (files) {
+			struct file *file;
+
+			rcu_read_lock();
+			file = fcheck_files(files, fd);
+			if (file) {
+				unsigned f_mode = file->f_mode;
+
+				rcu_read_unlock();
+				put_files_struct(files);
+
+				if (task_dumpable(task)) {
+					rcu_read_lock();
+					cred = __task_cred(task);
+					inode->i_uid = cred->euid;
+					inode->i_gid = cred->egid;
+					rcu_read_unlock();
+				} else {
+					inode->i_uid = GLOBAL_ROOT_UID;
+					inode->i_gid = GLOBAL_ROOT_GID;
+				}
+
+				if (S_ISLNK(inode->i_mode)) {
+					unsigned i_mode = S_IFLNK;
+					if (f_mode & FMODE_READ)
+						i_mode |= S_IRUSR | S_IXUSR;
+					if (f_mode & FMODE_WRITE)
+						i_mode |= S_IWUSR | S_IXUSR;
+					inode->i_mode = i_mode;
+				}
+
+				security_task_to_inode(task, inode);
+				put_task_struct(task);
+				return 1;
+			}
+			rcu_read_unlock();
+			put_files_struct(files);
+		}
+		put_task_struct(task);
+	}
+
+	d_drop(dentry);
+	return 0;
+}
+
+static const struct dentry_operations tid_fd_dentry_operations = {
+	.d_revalidate	= tid_fd_revalidate,
+	.d_delete	= pid_delete_dentry,
+};
+
+static int proc_fd_link(struct dentry *dentry, struct path *path)
+{
+	return proc_fd_info(dentry->d_inode, path, NULL);
+}
+
+static struct dentry *
+proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
+		    struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFLNK;
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+
+	ei->op.proc_get_link = proc_fd_link;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, NULL))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *proc_lookupfd_common(struct inode *dir,
+					   struct dentry *dentry,
+					   instantiate_t instantiate)
+{
+	struct task_struct *task = get_proc_task(dir);
+	struct dentry *result = ERR_PTR(-ENOENT);
+	unsigned fd = name_to_int(dentry);
+
+	if (!task)
+		goto out_no_task;
+	if (fd == ~0U)
+		goto out;
+
+	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+out:
+	put_task_struct(task);
+out_no_task:
+	return result;
+}
+
+static int proc_readfd_common(struct file * filp, void * dirent,
+			      filldir_t filldir, instantiate_t instantiate)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct task_struct *p = get_proc_task(inode);
+	struct files_struct *files;
+	unsigned int fd, ino;
+	int retval;
+
+	retval = -ENOENT;
+	if (!p)
+		goto out_no_task;
+	retval = 0;
+
+	fd = filp->f_pos;
+	switch (fd) {
+		case 0:
+			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		case 1:
+			ino = parent_ino(dentry);
+			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		default:
+			files = get_files_struct(p);
+			if (!files)
+				goto out;
+			rcu_read_lock();
+			for (fd = filp->f_pos - 2;
+			     fd < files_fdtable(files)->max_fds;
+			     fd++, filp->f_pos++) {
+				char name[PROC_NUMBUF];
+				int len;
+				int rv;
+
+				if (!fcheck_files(files, fd))
+					continue;
+				rcu_read_unlock();
+
+				len = snprintf(name, sizeof(name), "%d", fd);
+				rv = proc_fill_cache(filp, dirent, filldir,
+						     name, len, instantiate, p,
+						     (void *)(unsigned long)fd);
+				if (rv < 0)
+					goto out_fd_loop;
+				rcu_read_lock();
+			}
+			rcu_read_unlock();
+out_fd_loop:
+			put_files_struct(files);
+	}
+out:
+	put_task_struct(p);
+out_no_task:
+	return retval;
+}
+
+static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
+				size_t len, loff_t *ppos)
+{
+	char tmp[PROC_FDINFO_MAX];
+	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
+	if (!err)
+		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+	return err;
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open           = nonseekable_open,
+	.read		= proc_fdinfo_read,
+	.llseek		= no_llseek,
+};
+
+static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+}
+
+const struct file_operations proc_fd_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfd,
+	.llseek		= default_llseek,
+};
+
+static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
+				    struct nameidata *nd)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
+}
+
+/*
+ * /proc/pid/fd needs a special permission handler so that a process can still
+ * access /proc/self/fd after it has executed a setuid().
+ */
+int proc_fd_permission(struct inode *inode, int mask)
+{
+	int rv = generic_permission(inode, mask);
+	if (rv == 0)
+		return 0;
+	if (task_pid(current) == proc_pid(inode))
+		rv = 0;
+	return rv;
+}
+
+const struct inode_operations proc_fd_inode_operations = {
+	.lookup		= proc_lookupfd,
+	.permission	= proc_fd_permission,
+	.setattr	= proc_setattr,
+};
+
+static struct dentry *
+proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
+			struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_fop = &proc_fdinfo_file_operations;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, NULL))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *
+proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
+}
+
+static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir,
+				  proc_fdinfo_instantiate);
+}
+
+const struct inode_operations proc_fdinfo_inode_operations = {
+	.lookup		= proc_lookupfdinfo,
+	.setattr	= proc_setattr,
+};
+
+const struct file_operations proc_fdinfo_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfdinfo,
+	.llseek		= default_llseek,
+};
Index: linux-2.6.git/fs/proc/fd.h
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/proc/fd.h
@@ -0,0 +1,14 @@
+#ifndef __PROCFS_FD_H__
+#define __PROCFS_FD_H__
+
+#include <linux/fs.h>
+
+extern const struct file_operations proc_fd_operations;
+extern const struct inode_operations proc_fd_inode_operations;
+
+extern const struct file_operations proc_fdinfo_operations;
+extern const struct inode_operations proc_fdinfo_inode_operations;
+
+extern int proc_fd_permission(struct inode *inode, int mask);
+
+#endif /* __PROCFS_FD_H__ */
Index: linux-2.6.git/fs/proc/internal.h
===================================================================
--- linux-2.6.git.orig/fs/proc/internal.h
+++ linux-2.6.git/fs/proc/internal.h
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/sched.h>
 #include <linux/proc_fs.h>
 struct  ctl_table_header;
 
@@ -65,6 +66,7 @@ extern const struct file_operations proc
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
+extern const struct inode_operations proc_pid_link_inode_operations;
 
 struct proc_maps_private {
 	struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *
 	return PROC_I(inode)->fd;
 }
 
+static inline int task_dumpable(struct task_struct *task)
+{
+	int dumpable = 0;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		dumpable = get_dumpable(mm);
+	task_unlock(task);
+	if(dumpable == 1)
+		return 1;
+	return 0;
+}
+
+static inline int pid_delete_dentry(const struct dentry * dentry)
+{
+	/* Is the task we represent dead?
+	 * If so, then don't put the dentry on the lru list,
+	 * kill it immediately.
+	 */
+	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static inline unsigned name_to_int(struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	unsigned n = 0;
+
+	if (len > 1 && *name == '0')
+		goto out;
+	while (len-- > 0) {
+		unsigned c = *name++ - '0';
+		if (c > 9)
+			goto out;
+		if (n >= (~0U-9)/10)
+			goto out;
+		n *= 10;
+		n += c;
+	}
+	return n;
+out:
+	return ~0U;
+}
+
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 1/7] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-07-04  7:37   ` Pavel Emelyanov
  2012-06-27 11:01 ` [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers Cyrill Gorcunov
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-seq-ops-4 --]
[-- Type: text/plain, Size: 5111 bytes --]

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/proc/fd.c |  133 +++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 85 insertions(+), 48 deletions(-)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -6,61 +6,90 @@
 #include <linux/namei.h>
 #include <linux/pid.h>
 #include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
 
 #include <linux/proc_fs.h>
 
 #include "internal.h"
 #include "fd.h"
 
-#define PROC_FDINFO_MAX 64
+struct proc_fdinfo {
+	loff_t	f_pos;
+	int	f_flags;
+};
+
+static int seq_show(struct seq_file *m, void *v)
+{
+	struct proc_fdinfo *fdinfo = m->private;
+	seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+		   (long long)fdinfo->f_pos,
+		   fdinfo->f_flags);
+	return 0;
+}
 
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
 {
-	struct task_struct *task = get_proc_task(inode);
 	struct files_struct *files = NULL;
-	int fd = proc_fd(inode);
-	struct file *file;
+	struct proc_fdinfo *fdinfo = NULL;
+	struct task_struct *task;
+	int ret = -ENOENT;
 
+	fdinfo = kzalloc(sizeof(*fdinfo), GFP_KERNEL);
+	if (!fdinfo)
+		return -ENOMEM;
+
+	task = get_proc_task(inode);
 	if (task) {
 		files = get_files_struct(task);
 		put_task_struct(task);
 	}
+
 	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
+		int fd = proc_fd(inode);
+		struct file *fd_file;
+
 		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			struct fdtable *fdt = files_fdtable(files);
 
-			fdt = files_fdtable(files);
-			f_flags = file->f_flags & ~O_CLOEXEC;
+			fdinfo->f_flags = fd_file->f_flags & ~O_CLOEXEC;
 			if (close_on_exec(fd, fdt))
-				f_flags |= O_CLOEXEC;
-
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+				fdinfo->f_flags |= O_CLOEXEC;
+			ret = 0;
 		}
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+
+	if (!ret) {
+		ret = single_open(file, seq_show, fdinfo);
+		if (!ret)
+			fdinfo = NULL;
+	}
+
+	kfree(fdinfo);
+	return ret;
 }
 
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct proc_fdinfo *fdinfo = m->private;
+
+	kfree(fdinfo);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open		= seq_fdinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_fdinfo_release,
+};
+
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct files_struct *files;
@@ -130,7 +159,31 @@ static const struct dentry_operations ti
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(dentry->d_inode, path, NULL);
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *task = get_proc_task(inode);
+       struct files_struct *files = NULL;
+       int fd = proc_fd(inode);
+       struct file *file;
+       int err = -ENOENT;
+
+       if (task) {
+	       files = get_files_struct(task);
+	       put_task_struct(task);
+       }
+
+       if (files) {
+	       spin_lock(&files->file_lock);
+	       file = fcheck_files(files, fd);
+	       if (file) {
+		       *path = file->f_path;
+		       path_get(&file->f_path);
+	       }
+	       spin_unlock(&files->file_lock);
+	       put_files_struct(files);
+	       err = 0;
+       }
+
+       return err;
 }
 
 static struct dentry *
@@ -245,22 +298,6 @@ out_no_task:
 	return retval;
 }
 
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
 {
 	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 1/7] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-07-04  7:39   ` Pavel Emelyanov
  2012-06-27 11:01 ` [rfc 4/7] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-seq-ops-helpers-4 --]
[-- Type: text/plain, Size: 7365 bytes --]

This patch brings ability to plugin auxiliary fdinfo providers.
For example in further patches eventfd, evenpoll and fsnotify
will print out information associated with files.

This feature is CONFIG_CHECKPOINT_RESTORE guarded to eliminate
overhead for those who don't need it at all (this
unfortunately makes patch bigger than I wanted).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/proc/fd.c            |  208 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |   28 ++++++
 2 files changed, 236 insertions(+)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -8,12 +8,218 @@
 #include <linux/security.h>
 #include <linux/file.h>
 #include <linux/seq_file.h>
+#include <linux/spinlock.h>
 
 #include <linux/proc_fs.h>
 
 #include "internal.h"
 #include "fd.h"
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+
+static LIST_HEAD(fdinfo_drivers);
+static DECLARE_RWSEM(fdinfo_drivers_sem);
+
+int proc_register_fdinfo_driver(struct proc_fdinfo_driver *s)
+{
+	struct proc_fdinfo_driver *i;
+	int ret = 0;
+
+	if (!s->ops || !s->probe)
+		return -EINVAL;
+
+	down_write(&fdinfo_drivers_sem);
+	list_for_each_entry(i, &fdinfo_drivers, list) {
+		if (i == s) {
+			WARN_ONCE("Trying reassign fdinfo driver `%s'\n",
+				  i->name);
+			ret = -EINVAL;
+			break;
+		}
+	}
+	if (!ret)
+		list_add(&s->list, &fdinfo_drivers);
+	up_write(&fdinfo_drivers_sem);
+
+	return ret;
+}
+
+void proc_unregister_fdinfo_driver(struct proc_fdinfo_driver *s)
+{
+	struct proc_fdinfo_driver *i;
+
+	down_write(&fdinfo_drivers_sem);
+	list_for_each_entry(i, &fdinfo_drivers, list) {
+		if (i == s) {
+			list_del(&i->list);
+			break;
+		}
+	}
+	up_write(&fdinfo_drivers_sem);
+}
+
+static int prep_fdinfo_driver(struct proc_fdinfo_extra *extra)
+{
+	struct proc_fdinfo_driver *s;
+	int ret = 0;
+
+	down_read(&fdinfo_drivers_sem);
+	list_for_each_entry(s, &fdinfo_drivers, list) {
+		if (s->probe(extra->f_file)) {
+			extra->driver = s;
+			break;
+		}
+	}
+	up_read(&fdinfo_drivers_sem);
+
+	if (extra->driver && extra->driver->priv_size) {
+		extra->priv = kmalloc(extra->driver->priv_size, GFP_KERNEL);
+		if (!extra->priv)
+			ret = -ENOMEM;
+	}
+
+	return ret;
+}
+
+static void *seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+
+	down_read(&fdinfo_drivers_sem);
+	extra->pos = *pos;
+
+	return *pos == 0 ? extra :
+		(extra->driver ? extra->driver->ops->start(m, pos) : NULL);
+}
+
+static void seq_stop(struct seq_file *m, void *v)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+
+	if (extra->driver && extra->pos > 0)
+		extra->driver->ops->stop(m, v);
+	up_read(&fdinfo_drivers_sem);
+}
+
+static void *seq_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+	void *v = NULL;
+
+	if (extra->driver) {
+		int ret = 0;
+
+		if (*pos == 0) {
+			v = extra->driver->ops->start(m, pos);
+			if (v) {
+				ret = extra->driver->ops->show(m, v);
+				p = v;
+			} else
+				ret = -1;
+		}
+
+		if (!ret)
+			v = extra->driver->ops->next(m, p, pos);
+	} else
+		++*pos;
+
+	extra->pos = *pos;
+	return v;
+}
+
+static int seq_show(struct seq_file *m, void *v)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+
+	if (extra->driver && extra->pos > 0)
+		return extra->driver->ops->show(m, v);
+
+	seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+		   (long long)extra->f_file->f_pos,
+		   extra->f_flags);
+	return 0;
+}
+
+static const struct seq_operations fdinfo_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show,
+};
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+	struct files_struct *files = NULL;
+	struct proc_fdinfo_extra *extra;
+	struct task_struct *task;
+	struct seq_file *m;
+	int ret;
+
+	extra = kzalloc(sizeof(*extra), GFP_KERNEL);
+	if (!extra)
+		return -ENOMEM;
+
+	ret = seq_open(file, &fdinfo_seq_ops);
+	if (!ret) {
+		ret = -ENOENT;
+		m = file->private_data;
+		m->private = extra;
+
+		task = get_proc_task(inode);
+		if (task) {
+			files = get_files_struct(task);
+			put_task_struct(task);
+		}
+
+		if (files) {
+			int fd = proc_fd(inode);
+
+			spin_lock(&files->file_lock);
+			extra->f_file = fcheck_files(files, fd);
+			if (extra->f_file) {
+				struct fdtable *fdt = files_fdtable(files);
+
+				extra->f_flags = extra->f_file->f_flags & ~O_CLOEXEC;
+				if (close_on_exec(fd, fdt))
+					extra->f_flags |= O_CLOEXEC;
+				get_file(extra->f_file);
+			}
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+
+			ret = prep_fdinfo_driver(extra);
+		}
+	}
+
+	if (ret) {
+		if (extra->f_file)
+			put_filp(extra->f_file);
+		kfree(extra);
+	}
+	return ret;
+}
+
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct proc_fdinfo_extra *extra = m->private;
+
+	put_filp(extra->f_file);
+	kfree(extra->priv);
+	kfree(m->private);
+
+	return seq_release(inode, file);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open		= seq_fdinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_fdinfo_release,
+};
+
+#else /* CONFIG_CHECKPOINT_RESTORE */
+
 struct proc_fdinfo {
 	loff_t	f_pos;
 	int	f_flags;
@@ -90,6 +296,8 @@ static const struct file_operations proc
 	.release	= seq_fdinfo_release,
 };
 
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct files_struct *files;
Index: linux-2.6.git/include/linux/proc_fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/proc_fs.h
+++ linux-2.6.git/include/linux/proc_fs.h
@@ -100,6 +100,26 @@ struct vmcore {
 	loff_t offset;
 };
 
+struct seq_operations;
+
+/* fdinfo auxiliary information drivers */
+struct proc_fdinfo_driver {
+	struct list_head		list;
+	const char			*name;
+	const struct seq_operations	*ops;
+	int				(*probe)(struct file *file);
+	size_t				priv_size;
+};
+
+/* auxiliary data allocated per fdinfo reader */
+struct proc_fdinfo_extra {
+	struct proc_fdinfo_driver	*driver;
+	void				*priv;
+	loff_t				pos;
+	struct file			*f_file;
+	unsigned int			f_flags;
+};
+
 #ifdef CONFIG_PROC_FS
 
 extern void proc_root_init(void);
@@ -175,6 +195,11 @@ extern struct proc_dir_entry *proc_net_m
 
 extern struct file *proc_ns_fget(int fd);
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+extern int proc_register_fdinfo_driver(struct proc_fdinfo_driver *s);
+extern void proc_unregister_fdinfo_driver(struct proc_fdinfo_driver *s);
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -229,6 +254,9 @@ static inline struct file *proc_ns_fget(
 	return ERR_PTR(-EINVAL);
 }
 
+static inline int proc_register_fdinfo_driver(struct proc_fdinfo_driver *s) { return -EINVAL; }
+static inline void proc_unregister_fdinfo_driver(struct proc_fdinfo_driver *s) { }
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 4/7] fs, eventfd: Add procfs fdinfo helper
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
                   ` (2 preceding siblings ...)
  2012-06-27 11:01 ` [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 5/7] fs, epoll: " Cyrill Gorcunov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-eventfd-6 --]
[-- Type: text/plain, Size: 2276 bytes --]

This allow us to print out raw counter value.
The /proc/pid/fdinfo/fd output is

 | pos:	0
 | flags:	04002
 | eventfd-count:               5a

This feature is CONFIG_CHECKPOINT_RESTORE only.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/eventfd.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

Index: linux-2.6.git/fs/eventfd.c
===================================================================
--- linux-2.6.git.orig/fs/eventfd.c
+++ linux-2.6.git/fs/eventfd.c
@@ -19,6 +19,8 @@
 #include <linux/export.h>
 #include <linux/kref.h>
 #include <linux/eventfd.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 struct eventfd_ctx {
 	struct kref kref;
@@ -433,3 +435,56 @@ SYSCALL_DEFINE1(eventfd, unsigned int, c
 	return sys_eventfd2(count, 0);
 }
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+static void *seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+	return *pos == 0 ? extra->f_file : NULL;
+}
+
+static void *seq_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static int seq_show(struct seq_file *m, void *v)
+{
+	struct eventfd_ctx *ctx = ((struct file *)v)->private_data;
+
+	spin_lock_irq(&ctx->wqh.lock);
+	seq_printf(m, "eventfd-count: %16llx\n",
+		   (unsigned long long)ctx->count);
+	spin_unlock_irq(&ctx->wqh.lock);
+
+	return 0;
+}
+
+static void seq_stop(struct seq_file *p, void *v) { }
+
+static const struct seq_operations eventfd_fdinfo_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show,
+};
+
+static int is_eventfd_file(struct file *file)
+{
+	return file->f_op == &eventfd_fops;
+}
+
+static struct proc_fdinfo_driver eventfd_fdinfo = {
+	.name	= "eventfd",
+	.ops	= &eventfd_fdinfo_ops,
+	.probe	= is_eventfd_file,
+};
+
+static int __init eventfd_init(void)
+{
+	return proc_register_fdinfo_driver(&eventfd_fdinfo);
+}
+fs_initcall(eventfd_init);
+
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 5/7] fs, epoll: Add procfs fdinfo helper
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
                   ` (3 preceding siblings ...)
  2012-06-27 11:01 ` [rfc 4/7] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-07-19 14:52   ` Matthew Helsley
  2012-06-27 11:01 ` [rfc 6/7] fs, exportfs: Add export_encode_inode_fh helper Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 7/7] fs, notify: Add procfs fdinfo helper Cyrill Gorcunov
  6 siblings, 1 reply; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-eventpoll-4 --]
[-- Type: text/plain, Size: 3302 bytes --]

This allow us to print out eventpoll target file descriptor,
events and data, the /proc/pid/fdinfo/fd consists of

 | pos:	0
 | flags:	02
 | tfd:        5 events:       1d data: ffffffffffffffff

This feature is CONFIG_CHECKPOINT_RESTORE only.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/eventpoll.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Index: linux-2.6.git/fs/eventpoll.c
===================================================================
--- linux-2.6.git.orig/fs/eventpoll.c
+++ linux-2.6.git/fs/eventpoll.c
@@ -38,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 /*
  * LOCKING:
@@ -1897,6 +1899,83 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 	return error;
 }
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+struct epitem_fdinfo {
+	struct epoll_event	ev;
+	int			fd;
+};
+
+static struct epitem_fdinfo *
+seq_lookup_fdinfo(struct proc_fdinfo_extra *extra, struct eventpoll *ep, loff_t num)
+{
+	struct epitem_fdinfo *fdinfo = extra->priv;
+	struct epitem *epi = NULL;
+	struct rb_node *rbp;
+
+	mutex_lock(&ep->mtx);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		if (num-- == 0) {
+			epi = rb_entry(rbp, struct epitem, rbn);
+			fdinfo->fd = epi->ffd.fd;
+			fdinfo->ev = epi->event;
+			break;
+		}
+	}
+	mutex_unlock(&ep->mtx);
+
+	return epi ? fdinfo : NULL;
+}
+
+static void *seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+	struct eventpoll *ep = extra->f_file->private_data;
+
+	return seq_lookup_fdinfo(extra, ep, *pos);
+}
+
+static void seq_stop(struct seq_file *m, void *v) { }
+
+static void *seq_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct proc_fdinfo_extra *extra = m->private;
+	struct eventpoll *ep = extra->f_file->private_data;
+	++*pos;
+	return (void *)seq_lookup_fdinfo(extra, ep, *pos);
+}
+
+static int seq_show(struct seq_file *m, void *v)
+{
+	struct epitem_fdinfo *fdinfo = v;
+	seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+		   fdinfo->fd, fdinfo->ev.events,
+		   (long long)fdinfo->ev.data);
+	return 0;
+}
+
+static const struct seq_operations ep_fdinfo_ops = {
+	.start		= seq_start,
+	.next		= seq_next,
+	.stop		= seq_stop,
+	.show		= seq_show,
+};
+
+static struct proc_fdinfo_driver ep_fdinfo = {
+	.name		= "eventpoll",
+	.ops		= &ep_fdinfo_ops,
+	.probe		= is_file_epoll,
+	.priv_size	= sizeof(struct epitem_fdinfo),
+};
+
+static int __init ep_register_fdinfo_driver(void)
+{
+	return proc_register_fdinfo_driver(&ep_fdinfo);
+}
+#else
+static void ep_register_fdinfo_driver(void) { }
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
+
 static int __init eventpoll_init(void)
 {
 	struct sysinfo si;
@@ -1929,6 +2008,8 @@ static int __init eventpoll_init(void)
 	pwq_cache = kmem_cache_create("eventpoll_pwq",
 			sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
 
+	ep_register_fdinfo_driver();
+
 	return 0;
 }
 fs_initcall(eventpoll_init);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 6/7] fs, exportfs: Add export_encode_inode_fh helper
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
                   ` (4 preceding siblings ...)
  2012-06-27 11:01 ` [rfc 5/7] fs, epoll: " Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-06-27 11:01 ` [rfc 7/7] fs, notify: Add procfs fdinfo helper Cyrill Gorcunov
  6 siblings, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fs-exportfs-ino-2 --]
[-- Type: text/plain, Size: 2017 bytes --]

To provide fsnotify object inodes being watched without
binding to alphabetical path we need to encode them with
exportfs help. This patch adds a helper which operates
with plain inodes directly.

This feature is CONFIG_CHECKPOINT_RESTORE only.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/exportfs/expfs.c      |   21 +++++++++++++++++++++
 include/linux/exportfs.h |    4 ++++
 2 files changed, 25 insertions(+)

Index: linux-2.6.git/fs/exportfs/expfs.c
===================================================================
--- linux-2.6.git.orig/fs/exportfs/expfs.c
+++ linux-2.6.git/fs/exportfs/expfs.c
@@ -302,6 +302,27 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+int export_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len)
+{
+	int len = *max_len;
+	int type = FILEID_INO32_GEN;
+
+	if (len < 2) {
+		*max_len = 2;
+		return 255;
+	}
+
+	len = 2;
+	fid->i32.ino = inode->i_ino;
+	fid->i32.gen = inode->i_generation;
+	*max_len = len;
+
+	return type;
+}
+EXPORT_SYMBOL_GPL(export_encode_inode_fh);
+#endif
+
 /**
  * export_encode_fh - default export_operations->encode_fh function
  * @inode:   the object to encode
Index: linux-2.6.git/include/linux/exportfs.h
===================================================================
--- linux-2.6.git.orig/include/linux/exportfs.h
+++ linux-2.6.git/include/linux/exportfs.h
@@ -177,6 +177,10 @@ struct export_operations {
 	int (*commit_metadata)(struct inode *inode);
 };
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+extern int export_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len);
+#endif
+
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 	int *max_len, int connectable);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [rfc 7/7] fs, notify: Add procfs fdinfo helper
  2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
                   ` (5 preceding siblings ...)
  2012-06-27 11:01 ` [rfc 6/7] fs, exportfs: Add export_encode_inode_fh helper Cyrill Gorcunov
@ 2012-06-27 11:01 ` Cyrill Gorcunov
  2012-06-29 10:30   ` Cyrill Gorcunov
  6 siblings, 1 reply; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-27 11:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-fsdevel, Cyrill Gorcunov, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

[-- Attachment #1: seq-fdinfo-fsnotify-2 --]
[-- Type: text/plain, Size: 11679 bytes --]

This allow us to print out fsnotify details such as
watchee inode, device, mask and file handle.

For example for inotify objects the output is

 | pos:	0
 | flags:	02000000
 | inotify wd:        3 ino:             9e7e sdev:   800013 mask  800afce ignored_mask        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 7e9e0000640d1b6d
 | inotify wd:        2 ino:             a111 sdev:   800013 mask  800afce ignored_mask        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 11a1000020542153
 | inotify wd:        1 ino:            6b149 sdev:   800013 mask  800afce ignored_mask        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 49b1060023552153

For fanotify it is like

 | pos:	0
 | flags:	02
 | fanotify ino:            68f71 sdev:   800013 mask        1 ignored_mask 40000000 fhandle-bytes:        8 fhandle-type:        1 f_handle: 718f0600b9f42053
 | fanotify mnt_id:       13 mask        1 ignored_mask 40000000

This feature is CONFIG_CHECKPOINT_RESTORE only. To minimize
impact on general fsnotify code the new functionality is gathered
in fs/notify/fdinfo.c file mostly.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/notify/Makefile                 |    2 
 fs/notify/fanotify/fanotify_user.c |   17 ++
 fs/notify/fdinfo.c                 |  218 +++++++++++++++++++++++++++++++++++++
 fs/notify/fdinfo.h                 |   19 +++
 fs/notify/inotify/inotify_user.c   |   32 +++++
 5 files changed, 286 insertions(+), 2 deletions(-)

Index: linux-2.6.git/fs/notify/Makefile
===================================================================
--- linux-2.6.git.orig/fs/notify/Makefile
+++ linux-2.6.git/fs/notify/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o vfsmount_mark.o
+				   mark.o vfsmount_mark.o fdinfo.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
Index: linux-2.6.git/fs/notify/fanotify/fanotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/fanotify/fanotify_user.c
+++ linux-2.6.git/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,7 @@
 #include <asm/ioctls.h>
 
 #include "../../mount.h"
+#include "../fdinfo.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
@@ -877,6 +878,20 @@ asmlinkage long SyS_fanotify_mark(long f
 SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
 #endif
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+int is_file_fanotify(struct file *file)
+{
+	return file->f_op == &fanotify_fops;
+}
+
+static int __init fanotify_register_fdinfo_driver(void)
+{
+	return proc_register_fdinfo_driver(&fanotify_fdinfo);
+}
+#else
+void fanotify_register_fdinfo_driver(void) { }
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * fanotify_user_setup - Our initialization function.  Note that we cannot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -887,7 +902,7 @@ static int __init fanotify_user_setup(vo
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
 	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
 						   SLAB_PANIC);
-
+	fanotify_register_fdinfo_driver();
 	return 0;
 }
 device_initcall(fanotify_user_setup);
Index: linux-2.6.git/fs/notify/fdinfo.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.c
@@ -0,0 +1,218 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/inotify.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/exportfs.h>
+#include <linux/proc_fs.h>
+
+#include "inotify/inotify.h"
+#include "../fs/mount.h"
+
+struct inode_file_handle {
+	struct file_handle		h;
+	struct fid			fid;
+} __packed;
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
+
+#ifdef CONFIG_EXPORTFS
+static int inotify_encode_target(struct inode *inode, struct inode_file_handle *fhandle)
+{
+	int ret, size;
+
+	size = sizeof(fhandle->fid) >> 2;
+	ret = export_encode_inode_fh(inode, &fhandle->fid, &size);
+	BUG_ON(ret != FILEID_INO32_GEN);
+
+	fhandle->h.handle_type = FILEID_INO32_GEN;
+	fhandle->h.handle_bytes = size * sizeof(u32);
+
+	return 0;
+}
+#else
+static int inotify_encode_target(struct inode *inode, struct inode_file_handle *fhandle)
+{
+	fhandle->h.handle_type = FILEID_ROOT;
+	fhandle->h.handle_bytes = 0;
+	return 0;
+}
+#endif /* CONFIG_EXPORTFS */
+
+struct fsnotify_mark *
+seq_lookup_mark(struct proc_fdinfo_extra *extra, loff_t num)
+{
+	const int match = FSNOTIFY_MARK_FLAG_INODE | FSNOTIFY_MARK_FLAG_VFSMOUNT;
+        struct fsnotify_group *group = extra->f_file->private_data;
+        struct fsnotify_mark *mark, *v = NULL;
+
+        spin_lock(&group->mark_lock);
+        list_for_each_entry(mark, &group->marks_list, g_list) {
+                if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
+		    !(mark->flags & match))
+                        continue;
+                if (num-- == 0) {
+                        v = mark;
+                        fsnotify_get_mark(mark);
+                        break;
+                }
+        }
+
+        spin_unlock(&group->mark_lock);
+        return v;
+}
+
+static void *seq_start(struct seq_file *m, loff_t *pos)
+{
+        struct proc_fdinfo_extra *extra = m->private;
+        return seq_lookup_mark(extra, *pos);
+}
+
+static void seq_stop(struct seq_file *m, void *v)
+{
+        if (v)
+                fsnotify_put_mark(v);
+}
+
+static void *seq_next(struct seq_file *m, void *p, loff_t *pos)
+{
+        struct proc_fdinfo_extra *extra = m->private;
+        ++*pos;
+        if (p)
+                fsnotify_put_mark(p);
+        return (void *)seq_lookup_mark(extra, *pos);
+}
+
+#ifdef CONFIG_INOTIFY_USER
+
+extern int is_file_inotify(struct file *file);
+
+static int seq_show_inotify(struct seq_file *m, void *v)
+{
+        struct inotify_inode_mark *inode_mark;
+        struct fsnotify_mark *mark = v;
+        struct inode *inode;
+
+        spin_lock(&mark->lock);
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		goto out;
+
+	if (unlikely(!(mark->flags & FSNOTIFY_MARK_FLAG_INODE)))
+		goto out;
+
+	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+	inode = igrab(mark->i.inode);
+	if (inode) {
+		struct inode_file_handle fhandle;
+		int i;
+
+		inotify_encode_target(inode, &fhandle);
+
+		seq_printf(m, "inotify wd: %8d ino: %16lx sdev: %8x mask %8x "
+			   "ignored_mask %8x fhandle-bytes: %8x "
+			   "fhandle-type: %8x f_handle: ",
+			   inode_mark->wd, inode->i_ino,
+			   inode->i_sb->s_dev,
+			   mark->mask,
+			   mark->ignored_mask,
+			   fhandle.h.handle_bytes,
+			   fhandle.h.handle_type);
+
+		for (i = 0; i < fhandle.h.handle_bytes; i++)
+			seq_printf(m, "%02x",
+				   (int)(unsigned char)fhandle.h.f_handle[i]);
+		seq_putc(m, '\n');
+		iput(inode);
+	}
+out:
+        spin_unlock(&mark->lock);
+        return 0;
+}
+
+static const struct seq_operations inotify_fdinfo_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show_inotify,
+};
+
+struct proc_fdinfo_driver inotify_fdinfo = {
+	.name	= "inotify",
+	.ops	= &inotify_fdinfo_ops,
+	.probe	= is_file_inotify,
+};
+
+#endif /* CONFIG_INOTIFY_USER */
+
+#ifdef CONFIG_FANOTIFY
+
+extern int is_file_fanotify(struct file *file);
+
+static int seq_show_fanotify(struct seq_file *m, void *v)
+{
+        struct fsnotify_mark *mark = v;
+        struct inode *inode;
+
+        spin_lock(&mark->lock);
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		goto out;
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+		struct inode_file_handle fhandle;
+		int i;
+
+		inode = igrab(mark->i.inode);
+		if (!inode)
+			goto out;
+		inotify_encode_target(inode, &fhandle);
+
+		seq_printf(m, "fanotify ino: %16lx sdev: %8x mask %8x ignored_mask %8x "
+			   "fhandle-bytes: %8x fhandle-type: %8x f_handle: ",
+			   inode->i_ino,
+			   inode->i_sb->s_dev,
+			   mark->mask,
+			   mark->ignored_mask,
+			   fhandle.h.handle_bytes,
+			   fhandle.h.handle_type);
+
+		for (i = 0; i < fhandle.h.handle_bytes; i++)
+			seq_printf(m, "%02x",
+				   (int)(unsigned char)fhandle.h.f_handle[i]);
+		seq_putc(m, '\n');
+		iput(inode);
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
+		struct mount *mnt = real_mount(mark->m.mnt);
+
+		seq_printf(m, "fanotify mnt_id: %8x mask %8x ignored_mask %8x\n",
+			   mnt->mnt_id, mark->mask, mark->ignored_mask);
+	}
+out:
+        spin_unlock(&mark->lock);
+        return 0;
+}
+
+static const struct seq_operations fanotify_fdinfo_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show_fanotify,
+};
+
+struct proc_fdinfo_driver fanotify_fdinfo = {
+	.name		= "fanotify",
+	.ops		= &fanotify_fdinfo_ops,
+	.probe		= is_file_fanotify,
+};
+
+#endif /* CONFIG_FANOTIFY */
+
+#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */
+
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
Index: linux-2.6.git/fs/notify/fdinfo.h
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.h
@@ -0,0 +1,19 @@
+#ifndef __FSNOTIFY_FDINFO_H__
+#define __FSNOTIFY_FDINFO_H__
+
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+#ifdef CONFIG_INOTIFY_USER
+extern struct proc_fdinfo_driver inotify_fdinfo;
+#endif
+
+#ifdef CONFIG_FANOTIFY
+extern struct proc_fdinfo_driver fanotify_fdinfo;
+#endif
+
+#endif
+
+#endif /* __FSNOTIFY_FDINFO_H__ */
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -38,8 +38,12 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
+#include <linux/seq_file.h>
+#include <linux/exportfs.h>
+#include <linux/proc_fs.h>
 
 #include "inotify.h"
+#include "../fdinfo.h"
 
 #include <asm/ioctls.h>
 
@@ -827,6 +831,26 @@ out:
 	return ret;
 }
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+int is_file_inotify(struct file *file)
+{
+	return file->f_op == &inotify_fops;
+}
+
+static int __init inotify_register_fdinfo_driver(void)
+{
+	return proc_register_fdinfo_driver(&inotify_fdinfo);
+}
+
+static void __exit inotify_unregister_fdinfo_driver(void)
+{
+	proc_unregister_fdinfo_driver(&inotify_fdinfo);
+}
+#else
+static int __init inotify_register_fdinfo_driver(void) { return 0; }
+static void __exit inotify_unregister_fdinfo_driver(void) { }
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * inotify_user_setup - Our initialization function.  Note that we cannot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -862,6 +886,14 @@ static int __init inotify_user_setup(voi
 	inotify_max_user_instances = 128;
 	inotify_max_user_watches = 8192;
 
+	inotify_register_fdinfo_driver();
+
 	return 0;
 }
 module_init(inotify_user_setup);
+
+static void __exit inotify_user_exit(void)
+{
+	inotify_unregister_fdinfo_driver();
+}
+module_exit(inotify_user_exit);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 7/7] fs, notify: Add procfs fdinfo helper
  2012-06-27 11:01 ` [rfc 7/7] fs, notify: Add procfs fdinfo helper Cyrill Gorcunov
@ 2012-06-29 10:30   ` Cyrill Gorcunov
  0 siblings, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-06-29 10:30 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: Al Viro, Alexey Dobriyan, Andrew Morton, Pavel Emelyanov,
	James Bottomley

On Wed, Jun 27, 2012 at 03:01:23PM +0400, Cyrill Gorcunov wrote:
> This allow us to print out fsnotify details such as
> watchee inode, device, mask and file handle.
> 

A slightly updated version below (I've missed colons
in a couple of printk places).
---
From: Cyrill Gorcunov <gorcunov@openvz.org>
Subject: fs, notify: Add procfs fdinfo helper v2

This allow us to print out fsnotify details such as
watchee inode, device, mask and file handle.

For example for inotify objects the output is

 | pos:	0
 | flags:	02000000
 | inotify wd:        3 ino:             9e7e sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 7e9e0000640d1b6d
 | inotify wd:        2 ino:             a111 sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 11a1000020542153
 | inotify wd:        1 ino:            6b149 sdev:   800013 mask:  800afce ignored_mask:        0 fhandle-bytes:        8 fhandle-type:        1 f_handle: 49b1060023552153

For fanotify it is like

 | pos:	0
 | flags:	02
 | fanotify ino:            68f71 sdev:   800013 mask:        1 ignored_mask: 40000000 fhandle-bytes:        8 fhandle-type:        1 f_handle: 718f0600b9f42053
 | fanotify mnt_id:       13 mask:        1 ignored_mask: 40000000

This feature is CONFIG_CHECKPOINT_RESTORE only. To minimize
impact on general fsnotify code the new functionality is gathered
in fs/notify/fdinfo.c file mostly.

v2:
 - append missing colons to terms

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
---
 fs/notify/Makefile                 |    2 
 fs/notify/fanotify/fanotify_user.c |   17 ++
 fs/notify/fdinfo.c                 |  219 +++++++++++++++++++++++++++++++++++++
 fs/notify/fdinfo.h                 |   19 +++
 fs/notify/inotify/inotify_user.c   |   32 +++++
 5 files changed, 287 insertions(+), 2 deletions(-)

Index: linux-2.6.git/fs/notify/Makefile
===================================================================
--- linux-2.6.git.orig/fs/notify/Makefile
+++ linux-2.6.git/fs/notify/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o vfsmount_mark.o
+				   mark.o vfsmount_mark.o fdinfo.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
Index: linux-2.6.git/fs/notify/fanotify/fanotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/fanotify/fanotify_user.c
+++ linux-2.6.git/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,7 @@
 #include <asm/ioctls.h>
 
 #include "../../mount.h"
+#include "../fdinfo.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
@@ -877,6 +878,20 @@ asmlinkage long SyS_fanotify_mark(long f
 SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
 #endif
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+int is_file_fanotify(struct file *file)
+{
+	return file->f_op == &fanotify_fops;
+}
+
+static int __init fanotify_register_fdinfo_driver(void)
+{
+	return proc_register_fdinfo_driver(&fanotify_fdinfo);
+}
+#else
+void fanotify_register_fdinfo_driver(void) { }
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * fanotify_user_setup - Our initialization function.  Note that we cannot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -887,7 +902,7 @@ static int __init fanotify_user_setup(vo
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
 	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
 						   SLAB_PANIC);
-
+	fanotify_register_fdinfo_driver();
 	return 0;
 }
 device_initcall(fanotify_user_setup);
Index: linux-2.6.git/fs/notify/fdinfo.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.c
@@ -0,0 +1,219 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/inotify.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/seq_file.h>
+#include <linux/exportfs.h>
+#include <linux/proc_fs.h>
+
+#include "inotify/inotify.h"
+#include "../fs/mount.h"
+
+struct inode_file_handle {
+	struct file_handle		h;
+	struct fid			fid;
+} __packed;
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+#if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
+
+#ifdef CONFIG_EXPORTFS
+static int inotify_encode_target(struct inode *inode, struct inode_file_handle *fhandle)
+{
+	int ret, size;
+
+	size = sizeof(fhandle->fid) >> 2;
+	ret = export_encode_inode_fh(inode, &fhandle->fid, &size);
+	BUG_ON(ret != FILEID_INO32_GEN);
+
+	fhandle->h.handle_type = FILEID_INO32_GEN;
+	fhandle->h.handle_bytes = size * sizeof(u32);
+
+	return 0;
+}
+#else
+static int inotify_encode_target(struct inode *inode, struct inode_file_handle *fhandle)
+{
+	fhandle->h.handle_type = FILEID_ROOT;
+	fhandle->h.handle_bytes = 0;
+	return 0;
+}
+#endif /* CONFIG_EXPORTFS */
+
+struct fsnotify_mark *
+seq_lookup_mark(struct proc_fdinfo_extra *extra, loff_t num)
+{
+	const int match = FSNOTIFY_MARK_FLAG_INODE | FSNOTIFY_MARK_FLAG_VFSMOUNT;
+        struct fsnotify_group *group = extra->f_file->private_data;
+        struct fsnotify_mark *mark, *v = NULL;
+
+        spin_lock(&group->mark_lock);
+        list_for_each_entry(mark, &group->marks_list, g_list) {
+                if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
+		    !(mark->flags & match))
+                        continue;
+                if (num-- == 0) {
+                        v = mark;
+                        fsnotify_get_mark(mark);
+                        break;
+                }
+        }
+
+        spin_unlock(&group->mark_lock);
+        return v;
+}
+
+static void *seq_start(struct seq_file *m, loff_t *pos)
+{
+        struct proc_fdinfo_extra *extra = m->private;
+        return seq_lookup_mark(extra, *pos);
+}
+
+static void seq_stop(struct seq_file *m, void *v)
+{
+        if (v)
+                fsnotify_put_mark(v);
+}
+
+static void *seq_next(struct seq_file *m, void *p, loff_t *pos)
+{
+        struct proc_fdinfo_extra *extra = m->private;
+        ++*pos;
+        if (p)
+                fsnotify_put_mark(p);
+        return (void *)seq_lookup_mark(extra, *pos);
+}
+
+#ifdef CONFIG_INOTIFY_USER
+
+extern int is_file_inotify(struct file *file);
+
+static int seq_show_inotify(struct seq_file *m, void *v)
+{
+        struct inotify_inode_mark *inode_mark;
+        struct fsnotify_mark *mark = v;
+        struct inode *inode;
+
+        spin_lock(&mark->lock);
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		goto out;
+
+	if (unlikely(!(mark->flags & FSNOTIFY_MARK_FLAG_INODE)))
+		goto out;
+
+	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
+	inode = igrab(mark->i.inode);
+	if (inode) {
+		struct inode_file_handle fhandle;
+		int i;
+
+		inotify_encode_target(inode, &fhandle);
+
+		seq_printf(m, "inotify wd: %8d ino: %16lx sdev: %8x "
+			   "mask: %8x ignored_mask: %8x "
+			   "fhandle-bytes: %8x fhandle-type: %8x f_handle: ",
+			   inode_mark->wd, inode->i_ino,
+			   inode->i_sb->s_dev,
+			   mark->mask,
+			   mark->ignored_mask,
+			   fhandle.h.handle_bytes,
+			   fhandle.h.handle_type);
+
+		for (i = 0; i < fhandle.h.handle_bytes; i++)
+			seq_printf(m, "%02x",
+				   (int)(unsigned char)fhandle.h.f_handle[i]);
+		seq_putc(m, '\n');
+		iput(inode);
+	}
+out:
+        spin_unlock(&mark->lock);
+        return 0;
+}
+
+static const struct seq_operations inotify_fdinfo_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show_inotify,
+};
+
+struct proc_fdinfo_driver inotify_fdinfo = {
+	.name	= "inotify",
+	.ops	= &inotify_fdinfo_ops,
+	.probe	= is_file_inotify,
+};
+
+#endif /* CONFIG_INOTIFY_USER */
+
+#ifdef CONFIG_FANOTIFY
+
+extern int is_file_fanotify(struct file *file);
+
+static int seq_show_fanotify(struct seq_file *m, void *v)
+{
+        struct fsnotify_mark *mark = v;
+        struct inode *inode;
+
+        spin_lock(&mark->lock);
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
+		goto out;
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
+		struct inode_file_handle fhandle;
+		int i;
+
+		inode = igrab(mark->i.inode);
+		if (!inode)
+			goto out;
+		inotify_encode_target(inode, &fhandle);
+
+		seq_printf(m, "fanotify ino: %16lx sdev: %8x "
+			   "mask: %8x ignored_mask: %8x "
+			   "fhandle-bytes: %8x fhandle-type: %8x f_handle: ",
+			   inode->i_ino,
+			   inode->i_sb->s_dev,
+			   mark->mask,
+			   mark->ignored_mask,
+			   fhandle.h.handle_bytes,
+			   fhandle.h.handle_type);
+
+		for (i = 0; i < fhandle.h.handle_bytes; i++)
+			seq_printf(m, "%02x",
+				   (int)(unsigned char)fhandle.h.f_handle[i]);
+		seq_putc(m, '\n');
+		iput(inode);
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
+		struct mount *mnt = real_mount(mark->m.mnt);
+
+		seq_printf(m, "fanotify mnt_id: %8x mask: %8x ignored_mask: %8x\n",
+			   mnt->mnt_id, mark->mask, mark->ignored_mask);
+	}
+out:
+        spin_unlock(&mark->lock);
+        return 0;
+}
+
+static const struct seq_operations fanotify_fdinfo_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= seq_show_fanotify,
+};
+
+struct proc_fdinfo_driver fanotify_fdinfo = {
+	.name		= "fanotify",
+	.ops		= &fanotify_fdinfo_ops,
+	.probe		= is_file_fanotify,
+};
+
+#endif /* CONFIG_FANOTIFY */
+
+#endif /* CONFIG_INOTIFY_USER || CONFIG_FANOTIFY */
+
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
Index: linux-2.6.git/fs/notify/fdinfo.h
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/notify/fdinfo.h
@@ -0,0 +1,19 @@
+#ifndef __FSNOTIFY_FDINFO_H__
+#define __FSNOTIFY_FDINFO_H__
+
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+
+#ifdef CONFIG_INOTIFY_USER
+extern struct proc_fdinfo_driver inotify_fdinfo;
+#endif
+
+#ifdef CONFIG_FANOTIFY
+extern struct proc_fdinfo_driver fanotify_fdinfo;
+#endif
+
+#endif
+
+#endif /* __FSNOTIFY_FDINFO_H__ */
Index: linux-2.6.git/fs/notify/inotify/inotify_user.c
===================================================================
--- linux-2.6.git.orig/fs/notify/inotify/inotify_user.c
+++ linux-2.6.git/fs/notify/inotify/inotify_user.c
@@ -38,8 +38,12 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
+#include <linux/seq_file.h>
+#include <linux/exportfs.h>
+#include <linux/proc_fs.h>
 
 #include "inotify.h"
+#include "../fdinfo.h"
 
 #include <asm/ioctls.h>
 
@@ -827,6 +831,26 @@ out:
 	return ret;
 }
 
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
+int is_file_inotify(struct file *file)
+{
+	return file->f_op == &inotify_fops;
+}
+
+static int __init inotify_register_fdinfo_driver(void)
+{
+	return proc_register_fdinfo_driver(&inotify_fdinfo);
+}
+
+static void __exit inotify_unregister_fdinfo_driver(void)
+{
+	proc_unregister_fdinfo_driver(&inotify_fdinfo);
+}
+#else
+static int __init inotify_register_fdinfo_driver(void) { return 0; }
+static void __exit inotify_unregister_fdinfo_driver(void) { }
+#endif /* CONFIG_PROC_FS && CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * inotify_user_setup - Our initialization function.  Note that we cannot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -862,6 +886,14 @@ static int __init inotify_user_setup(voi
 	inotify_max_user_instances = 128;
 	inotify_max_user_watches = 8192;
 
+	inotify_register_fdinfo_driver();
+
 	return 0;
 }
 module_init(inotify_user_setup);
+
+static void __exit inotify_user_exit(void)
+{
+	inotify_unregister_fdinfo_driver();
+}
+module_exit(inotify_user_exit);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file
  2012-06-27 11:01 ` [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file Cyrill Gorcunov
@ 2012-07-04  7:37   ` Pavel Emelyanov
  2012-07-04  8:03     ` Cyrill Gorcunov
  2012-07-05 17:37     ` Cyrill Gorcunov
  0 siblings, 2 replies; 17+ messages in thread
From: Pavel Emelyanov @ 2012-07-04  7:37 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

> @@ -130,7 +159,31 @@ static const struct dentry_operations ti
>  
>  static int proc_fd_link(struct dentry *dentry, struct path *path)
>  {
> -	return proc_fd_info(dentry->d_inode, path, NULL);
> +       struct inode *inode = dentry->d_inode;
> +       struct task_struct *task = get_proc_task(inode);
> +       struct files_struct *files = NULL;
> +       int fd = proc_fd(inode);
> +       struct file *file;
> +       int err = -ENOENT;
> +
> +       if (task) {
> +	       files = get_files_struct(task);
> +	       put_task_struct(task);
> +       }
> +
> +       if (files) {
> +	       spin_lock(&files->file_lock);
> +	       file = fcheck_files(files, fd);
> +	       if (file) {
> +		       *path = file->f_path;
> +		       path_get(&file->f_path);
> +	       }
> +	       spin_unlock(&files->file_lock);
> +	       put_files_struct(files);
> +	       err = 0;
> +       }
> +
> +       return err;
>  }
>  
>  static struct dentry *
> @@ -245,22 +298,6 @@ out_no_task:
>  	return retval;
>  }
>  
> -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
> -				size_t len, loff_t *ppos)
> -{
> -	char tmp[PROC_FDINFO_MAX];
> -	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
> -	if (!err)
> -		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
> -	return err;
> -}
> -

I believe we can still have the proc_fdinfo_read and proc_fd_link code non-splitted.
Just push a callback pointer ino the proc_fd_info (as usual -- we an opaque void *argument).

Thanks,
Pavel


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers
  2012-06-27 11:01 ` [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers Cyrill Gorcunov
@ 2012-07-04  7:39   ` Pavel Emelyanov
  2012-07-04  7:50     ` Cyrill Gorcunov
  2012-07-05 17:44     ` Cyrill Gorcunov
  0 siblings, 2 replies; 17+ messages in thread
From: Pavel Emelyanov @ 2012-07-04  7:39 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

On 06/27/2012 03:01 PM, Cyrill Gorcunov wrote:
> This patch brings ability to plugin auxiliary fdinfo providers.
> For example in further patches eventfd, evenpoll and fsnotify
> will print out information associated with files.
> 
> This feature is CONFIG_CHECKPOINT_RESTORE guarded to eliminate
> overhead for those who don't need it at all (this
> unfortunately makes patch bigger than I wanted).
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> CC: Al Viro <viro@ZenIV.linux.org.uk>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: Pavel Emelyanov <xemul@parallels.com>
> CC: James Bottomley <jbottomley@parallels.com>
> ---
>  fs/proc/fd.c            |  208 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/proc_fs.h |   28 ++++++
>  2 files changed, 236 insertions(+)

How about introducing one more file_operations member and call it in the
seq_show() (if exists)? Only inotify, eventfd and eventpoll will implement
one. This will allow to avoid all this complexity with drivers.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers
  2012-07-04  7:39   ` Pavel Emelyanov
@ 2012-07-04  7:50     ` Cyrill Gorcunov
  2012-07-05 17:44     ` Cyrill Gorcunov
  1 sibling, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-07-04  7:50 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

On Wed, Jul 04, 2012 at 11:39:30AM +0400, Pavel Emelyanov wrote:
> 
> How about introducing one more file_operations member and call it in the
> seq_show() (if exists)? Only inotify, eventfd and eventpoll will implement
> one. This will allow to avoid all this complexity with drivers.

Hmm. Thanks for idea, I need to estimate which impact it will have. Will ping
back once it's done.

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file
  2012-07-04  7:37   ` Pavel Emelyanov
@ 2012-07-04  8:03     ` Cyrill Gorcunov
  2012-07-05 17:37     ` Cyrill Gorcunov
  1 sibling, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-07-04  8:03 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

On Wed, Jul 04, 2012 at 11:37:26AM +0400, Pavel Emelyanov wrote:
> >  
> > -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
> > -				size_t len, loff_t *ppos)
> > -{
> > -	char tmp[PROC_FDINFO_MAX];
> > -	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
> > -	if (!err)
> > -		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
> > -	return err;
> > -}
> > -
> 
> I believe we can still have the proc_fdinfo_read and proc_fd_link code non-splitted.
> Just push a callback pointer ino the proc_fd_info (as usual -- we an opaque void *argument).

Thanks! Will try.

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file
  2012-07-04  7:37   ` Pavel Emelyanov
  2012-07-04  8:03     ` Cyrill Gorcunov
@ 2012-07-05 17:37     ` Cyrill Gorcunov
  1 sibling, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-07-05 17:37 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

On Wed, Jul 04, 2012 at 11:37:26AM +0400, Pavel Emelyanov wrote:
...
> 
> I believe we can still have the proc_fdinfo_read and proc_fd_link code non-splitted.
> Just push a callback pointer ino the proc_fd_info (as usual -- we an opaque void *argument).

Hi Pavel, sorry for delay (was busy a bit). It seems this wont work.
Look, previously we have had

static int proc_fd_info(struct inode *inode, struct path *path, char *info)
{
	struct task_struct *task = get_proc_task(inode);
	...

	if (info) {
		...
	}

	this makes info argument optional
}

static int proc_fd_link(struct dentry *dentry, struct path *path)
{
	return proc_fd_info(dentry->d_inode, path, NULL);
}

it's possible because we didn't use seq-files engine. One we switch to
seq-files the old proc_fd_info become a part of

static const struct file_operations proc_fdinfo_file_operations = {
	.open		= seq_fdinfo_open,

ie the declaration of seq_fdinfo_open is restricted to file_operations::open
method and I can't add opaque void *argument here.

Sure I can add one more wrapper function but I guess this increase code
complexity which I tried to escape.

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers
  2012-07-04  7:39   ` Pavel Emelyanov
  2012-07-04  7:50     ` Cyrill Gorcunov
@ 2012-07-05 17:44     ` Cyrill Gorcunov
  1 sibling, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-07-05 17:44 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, James Bottomley

On Wed, Jul 04, 2012 at 11:39:30AM +0400, Pavel Emelyanov wrote:
> On 06/27/2012 03:01 PM, Cyrill Gorcunov wrote:
> > This patch brings ability to plugin auxiliary fdinfo providers.
> > For example in further patches eventfd, evenpoll and fsnotify
> > will print out information associated with files.
> > 
> > This feature is CONFIG_CHECKPOINT_RESTORE guarded to eliminate
> > overhead for those who don't need it at all (this
> > unfortunately makes patch bigger than I wanted).
> 
> How about introducing one more file_operations member and call it in the
> seq_show() (if exists)? Only inotify, eventfd and eventpoll will implement
> one. This will allow to avoid all this complexity with drivers.

Hi Pavel, yes, this might be an option, if people agree with this approach
I'll redo my patches (this will require to remake all the rest patches so
I would like to know people opinion before start doint that). Anyone?

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 5/7] fs, epoll: Add procfs fdinfo helper
  2012-06-27 11:01 ` [rfc 5/7] fs, epoll: " Cyrill Gorcunov
@ 2012-07-19 14:52   ` Matthew Helsley
  2012-07-19 15:03     ` Cyrill Gorcunov
  0 siblings, 1 reply; 17+ messages in thread
From: Matthew Helsley @ 2012-07-19 14:52 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

On Wed, Jun 27, 2012 at 4:01 AM, Cyrill Gorcunov <gorcunov@openvz.org> wrote:
> This allow us to print out eventpoll target file descriptor,
> events and data, the /proc/pid/fdinfo/fd consists of
>
>  | pos: 0
>  | flags:       02
>  | tfd:        5 events:       1d data: ffffffffffffffff
>
> This feature is CONFIG_CHECKPOINT_RESTORE only.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> CC: Al Viro <viro@ZenIV.linux.org.uk>
> CC: Alexey Dobriyan <adobriyan@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: Pavel Emelyanov <xemul@parallels.com>
> CC: James Bottomley <jbottomley@parallels.com>
> ---
>  fs/eventpoll.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
>
> Index: linux-2.6.git/fs/eventpoll.c
> ===================================================================
> --- linux-2.6.git.orig/fs/eventpoll.c
> +++ linux-2.6.git/fs/eventpoll.c
> @@ -38,6 +38,8 @@
>  #include <asm/io.h>
>  #include <asm/mman.h>
>  #include <linux/atomic.h>
> +#include <linux/proc_fs.h>
> +#include <linux/seq_file.h>
>
>  /*
>   * LOCKING:
> @@ -1897,6 +1899,83 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd,
>         return error;
>  }
>
> +#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
> +
> +struct epitem_fdinfo {
> +       struct epoll_event      ev;
> +       int                     fd;
> +};
> +
> +static struct epitem_fdinfo *
> +seq_lookup_fdinfo(struct proc_fdinfo_extra *extra, struct eventpoll *ep, loff_t num)
> +{
> +       struct epitem_fdinfo *fdinfo = extra->priv;
> +       struct epitem *epi = NULL;
> +       struct rb_node *rbp;
> +
> +       mutex_lock(&ep->mtx);
> +       for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
> +               if (num-- == 0) {
> +                       epi = rb_entry(rbp, struct epitem, rbn);
> +                       fdinfo->fd = epi->ffd.fd;
> +                       fdinfo->ev = epi->event;
> +                       break;

This will be incredibly slow. epoll was designed to scale to tens of
thousands of file descriptors. This algorithm is O(N^2) because each
time we show a new epoll item we walk through the whole rb tree again
(we're not doing a search so it isn't O(NlogN)).

Also, we could miss one or more later items if one of the earlier
items is removed from the epoll set in between "seq_lookup_fdinfo"
calls. This isn't a problem for checkpoint because we assume the task
(and everything with this eventpoll file in its fd table) is frozen.
However it means the file will be worse than useless for almost any
other purpose because they are unlikely to realize they need to freeze
all the task(s) to get consistent data.

Cheers,
    -Matt Helsley

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [rfc 5/7] fs, epoll: Add procfs fdinfo helper
  2012-07-19 14:52   ` Matthew Helsley
@ 2012-07-19 15:03     ` Cyrill Gorcunov
  0 siblings, 0 replies; 17+ messages in thread
From: Cyrill Gorcunov @ 2012-07-19 15:03 UTC (permalink / raw)
  To: Matthew Helsley
  Cc: linux-kernel, linux-fsdevel, Al Viro, Alexey Dobriyan,
	Andrew Morton, Pavel Emelyanov, James Bottomley

On Thu, Jul 19, 2012 at 07:52:41AM -0700, Matthew Helsley wrote:
> On Wed, Jun 27, 2012 at 4:01 AM, Cyrill Gorcunov <gorcunov@openvz.org> wrote:
> > This allow us to print out eventpoll target file descriptor,
> > events and data, the /proc/pid/fdinfo/fd consists of
> >
> >  | pos: 0
> >  | flags:       02
> >  | tfd:        5 events:       1d data: ffffffffffffffff
> >
> > +#if defined(CONFIG_PROC_FS) && defined(CONFIG_CHECKPOINT_RESTORE)
> > +
> > +struct epitem_fdinfo {
> > +       struct epoll_event      ev;
> > +       int                     fd;
> > +};
> > +
> > +static struct epitem_fdinfo *
> > +seq_lookup_fdinfo(struct proc_fdinfo_extra *extra, struct eventpoll *ep, loff_t num)
> > +{
> > +       struct epitem_fdinfo *fdinfo = extra->priv;
> > +       struct epitem *epi = NULL;
> > +       struct rb_node *rbp;
> > +
> > +       mutex_lock(&ep->mtx);
> > +       for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
> > +               if (num-- == 0) {
> > +                       epi = rb_entry(rbp, struct epitem, rbn);
> > +                       fdinfo->fd = epi->ffd.fd;
> > +                       fdinfo->ev = epi->event;
> > +                       break;
> 
> This will be incredibly slow. epoll was designed to scale to tens of
> thousands of file descriptors. This algorithm is O(N^2) because each
> time we show a new epoll item we walk through the whole rb tree again
> (we're not doing a search so it isn't O(NlogN)).

Yeah, I know, it's quadratic. I'll be reworking this series to use
immediate seq-printf and print out the whole tree once the appropriate
fdinfo file get read.

> Also, we could miss one or more later items if one of the earlier
> items is removed from the epoll set in between "seq_lookup_fdinfo"
> calls. This isn't a problem for checkpoint because we assume the task
> (and everything with this eventpoll file in its fd table) is frozen.
> However it means the file will be worse than useless for almost any
> other purpose because they are unlikely to realize they need to freeze
> all the task(s) to get consistent data.

Well, a bunch of data read from proc is consistent only at moment of
reading.

	Cyrill

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2012-07-19 15:03 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-27 11:01 [rfc 0/7] [rfc] procfs, fdinfo seqfile providers Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 1/7] procfs: Move /proc/pid/fd[info] handling code to fd.[ch] Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 2/7] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file Cyrill Gorcunov
2012-07-04  7:37   ` Pavel Emelyanov
2012-07-04  8:03     ` Cyrill Gorcunov
2012-07-05 17:37     ` Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 3/7] procfs: Add ability to plugin auxiliary fdinfo providers Cyrill Gorcunov
2012-07-04  7:39   ` Pavel Emelyanov
2012-07-04  7:50     ` Cyrill Gorcunov
2012-07-05 17:44     ` Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 4/7] fs, eventfd: Add procfs fdinfo helper Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 5/7] fs, epoll: " Cyrill Gorcunov
2012-07-19 14:52   ` Matthew Helsley
2012-07-19 15:03     ` Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 6/7] fs, exportfs: Add export_encode_inode_fh helper Cyrill Gorcunov
2012-06-27 11:01 ` [rfc 7/7] fs, notify: Add procfs fdinfo helper Cyrill Gorcunov
2012-06-29 10:30   ` Cyrill Gorcunov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.