linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: Oren Laadan <orenl@cs.columbia.edu>
Cc: arnd@arndb.de, jeremy@goop.org, linux-kernel@vger.kernel.org,
	containers@lists.linux-foundation.org
Subject: Re: [RFC v3][PATCH 8/9] File descriprtors (dump)
Date: Thu, 04 Sep 2008 11:41:00 -0700	[thread overview]
Message-ID: <1220553660.23386.60.camel@nimitz> (raw)
In-Reply-To: <Pine.LNX.4.64.0809040405250.5982@takamine.ncl.cs.columbia.edu>

On Thu, 2008-09-04 at 04:05 -0400, Oren Laadan wrote:
> +/**
> + * cr_scan_fds - scan file table and construct array of open fds
> + * @files: files_struct pointer
> + * @fdtable: (output) array of open fds
> + * @return: the number of open fds found
> + *
> + * Allocates the file descriptors array (*fdtable), caller should free
> + */
> +int cr_scan_fds(struct files_struct *files, int **fdtable)
> +{
> +	struct fdtable *fdt;
> +	int *fdlist;
> +	int i, n, max;
> +
> +	max = CR_DEFAULT_FDTABLE;
> +
> + repeat:
> +	n = 0;
> +	fdlist = kmalloc(max * sizeof(*fdlist), GFP_KERNEL);
> +	if (!fdlist)
> +		return -ENOMEM;
> +
> +	spin_lock(&files->file_lock);
> +	fdt = files_fdtable(files);
> +	for (i = 0; i < fdt->max_fds; i++) {
> +		if (fcheck_files(files, i)) {
> +			if (n == max) {
> +				spin_unlock(&files->file_lock);
> +				kfree(fdlist);
> +				max *= 2;
> +				if (max < 0) {	/* overflow ? */
> +					n = -EMFILE;
> +					break;
> +				}
> +				goto repeat;
> +			}
> +			fdlist[n++] = i;
> +		}
> +	}
> +	spin_unlock(&files->file_lock);
> +
> +	*fdtable = fdlist;
> +	return n;
> +}

That loop needs some love.  At least save us from one level of
indenting:

> +	for (i = 0; i < fdt->max_fds; i++) {
> +		if (!fcheck_files(files, i)
> 			continue;
> 		if (n == max) {
> +			spin_unlock(&files->file_lock);
> +			kfree(fdlist);
> +			max *= 2;
> +			if (max < 0) {	/* overflow ? */
> +				n = -EMFILE;
> +				break;
> +			}
> +			goto repeat;
> +		}
> +		fdlist[n++] = i;
> +	}

My gut also says that there has to be a better way to find a good size
for fdlist() than growing it this way.  

Why do we even have a fixed size for this?

+#define CR_DEFAULT_FDTABLE  256

> +/* cr_write_fd_data - dump the state of a given file pointer */
> +static int cr_write_fd_data(struct cr_ctx *ctx, struct file *file, int parent)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_fd_data *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct dentry *dent = file->f_dentry;
> +	struct inode *inode = dent->d_inode;
> +	enum fd_type fd_type;
> +	int ret;
> +
> +	h.type = CR_HDR_FD_DATA;
> +	h.len = sizeof(*hh);
> +	h.parent = parent;
> +
> +	BUG_ON(!inode);

Why a BUG_ON()?  We'll deref it in just a sec anyway.  We prefer to just
get the NULL dereference rather than an explicit BUG_ON().

> +	hh->f_flags = file->f_flags;
> +	hh->f_mode = file->f_mode;
> +	hh->f_pos = file->f_pos;
> +	hh->f_uid = file->f_uid;
> +	hh->f_gid = file->f_gid;

Is there a plan to save off the 'struct user' here instead?  Nested user
namespaces in one checkpoint image might get confused otherwise.

> +	hh->f_version = file->f_version;
> +	/* FIX: need also file->f_owner */
> +
> +	switch (inode->i_mode & S_IFMT) {
> +	case S_IFREG:
> +		fd_type = CR_FD_FILE;
> +		break;
> +	case S_IFDIR:
> +		fd_type = CR_FD_DIR;
> +		break;
> +	case S_IFLNK:
> +		fd_type = CR_FD_LINK;
> +		break;
> +	default:
> +		return -EBADF;
> +	}

Why don't we just store (and use) (inode->i_mode & S_IFMT) in fd_type
instead of making our own types?

> +	/* FIX: check if the file/dir/link is unlinked */
> +	hh->fd_type = fd_type;
> +
> +	ret = cr_write_obj(ctx, &h, hh);
> +	cr_hbuf_put(ctx, sizeof(*hh));
> +	if (ret < 0)
> +		return ret;
> +
> +	return cr_write_fname(ctx, &file->f_path, ctx->vfsroot);
> +}
> +
> +/**
> + * cr_write_fd_ent - dump the state of a given file descriptor
> + * @ctx: checkpoint context
> + * @files: files_struct pointer
> + * @fd: file descriptor
> + *
> + * Save the state of the file descriptor; look up the actual file pointer
> + * in the hash table, and if found save the matching objref, otherwise call
> + * cr_write_fd_data to dump the file pointer too.
> + */
> +static int
> +cr_write_fd_ent(struct cr_ctx *ctx, struct files_struct *files, int fd)
> +{
> +	struct cr_hdr h;
> +	struct cr_hdr_fd_ent *hh = cr_hbuf_get(ctx, sizeof(*hh));
> +	struct file *file = NULL;
> +	struct fdtable *fdt;
> +	int coe, objref, ret;
> +
> +	/* make sure hh->fd (that is of type __u16) doesn't overflow */
> +	if (fd > USHORT_MAX) {
> +		pr_warning("CR: open files table too big (%d)\n", USHORT_MAX);
> +		return -EMFILE;
> +	}

Since the kernel always seems to make fds integers, it would make sense
to me to store them as integers in the checkpoint image.  Why bother to
shrink them down to a 16-bit type?

> +	rcu_read_lock();
> +	fdt = files_fdtable(files);
> +	file = fcheck_files(files, fd);
> +	if (file) {
> +		coe = FD_ISSET(fd, fdt->close_on_exec);
> +		get_file(file);
> +	}
> +	rcu_read_unlock();
> +
> +	/* sanity check (although this shouldn't happen) */
> +	if (!file)
> +		return -EBADF;
> +
> +	ret = cr_obj_add_ptr(ctx, (void *) file, &objref, CR_OBJ_FILE, 0);
> +	cr_debug("fd %d objref %d file %p c-o-e %d)\n", fd, objref, file, coe);
> +
> +	if (ret >= 0) {
> +		int new = ret;
> +
> +		h.type = CR_HDR_FD_ENT;
> +		h.len = sizeof(*hh);
> +		h.parent = 0;
> +
> +		hh->objref = objref;
> +		hh->fd = fd;
> +		hh->close_on_exec = coe;
> +
> +		ret = cr_write_obj(ctx, &h, hh);
> +		cr_hbuf_put(ctx, sizeof(*hh));
> +		if (ret < 0)
> +			return ret;
> +
> +		/* new==1 if-and-only-if file was new and added to hash */
> +		if (new)
> +			ret = cr_write_fd_data(ctx, file, objref);
> +	}

This if() block is in the normal flow path of the function and should go
at the top indentation level.  You can just do this:

	  if (ret < 0)
		goto out;
  	  // if block contents here...

   out:
> +	fput(file);
> +	return ret;
> +}
-- Dave


  parent reply	other threads:[~2008-09-04 18:45 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-04  7:57 [RFC v3][PATCH 0/9] Kernel based checkpoint/restart Oren Laadan
2008-09-04  8:02 ` [RFC v3][PATCH 1/9] Create syscalls: sys_checkpoint, sys_restart Oren Laadan
2008-09-04  8:37   ` Cedric Le Goater
2008-09-04 14:42   ` Serge E. Hallyn
2008-09-04 17:32     ` Oren Laadan
2008-09-04 20:37       ` Serge E. Hallyn
2008-09-04 21:05         ` Oren Laadan
2008-09-04 22:03           ` Serge E. Hallyn
2008-09-08 15:02     ` [Devel] " Andrey Mirkin
2008-09-08 16:07       ` Cedric Le Goater
2008-09-04  8:02 ` [RFC v3][PATCH 2/9] General infrastructure for checkpoint restart Oren Laadan
2008-09-04  9:12   ` Louis Rilling
2008-09-04 16:00     ` Serge E. Hallyn
2008-09-04 16:03   ` Serge E. Hallyn
2008-09-04 16:09     ` Dave Hansen
2008-09-04  8:03 ` [RFC v3][PATCH 3/9] x86 support for checkpoint/restart Oren Laadan
2008-09-04  8:03 ` [RFC v3][PATCH 4/9] Memory management (dump) Oren Laadan
2008-09-04 18:25   ` Dave Hansen
2008-09-07  1:54     ` Oren Laadan
2008-09-08 15:55       ` Dave Hansen
2008-09-04  8:04 ` [RFC v3][PATCH 5/9] Memory managemnet (restore) Oren Laadan
2008-09-04 18:08   ` Dave Hansen
2008-09-07  3:09     ` Oren Laadan
2008-09-08 16:49       ` Dave Hansen
2008-09-09  6:01         ` Oren Laadan
2008-09-10 21:42           ` Dave Hansen
2008-09-10 22:00             ` Cleanups for: [PATCH " Dave Hansen
2008-09-11  7:37             ` [RFC v3][PATCH " Oren Laadan
2008-09-11 15:38               ` Serge E. Hallyn
2008-09-12 16:34               ` Dave Hansen
2008-09-04  8:04 ` [RFC v3][PATCH 6/9] Checkpoint/restart: initial documentation Oren Laadan
2008-09-04  8:05 ` [RFC v3][PATCH 7/9] Infrastructure for shared objects Oren Laadan
2008-09-04  9:38   ` Louis Rilling
2008-09-04 14:23     ` Oren Laadan
2008-09-04 18:14   ` Dave Hansen
2008-09-04  8:05 ` [RFC v3][PATCH 8/9] File descriprtors (dump) Oren Laadan
2008-09-04  9:47   ` Louis Rilling
2008-09-04 14:43     ` Oren Laadan
2008-09-04 15:01   ` Dave Hansen
2008-09-04 18:41   ` Dave Hansen [this message]
2008-09-07  4:52     ` Oren Laadan
2008-09-08 16:57       ` Dave Hansen
2008-09-04  8:06 ` [RFC v3][PATCH 9/9] File descriprtors (restore) Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1220553660.23386.60.camel@nimitz \
    --to=dave@linux.vnet.ibm.com \
    --cc=arnd@arndb.de \
    --cc=containers@lists.linux-foundation.org \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=orenl@cs.columbia.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).