From: Andrew Morton <akpm@linux-foundation.org>
To: Alexander Shishkin <virtuoso@slind.org>
Cc: Valdis.Kletnieks@vt.edu, linux-fsdevel@vger.kernel.org,
viro@zeniv.linux.org.uk
Subject: Re: [RFC][PATCHv2] List per-process file descriptor consumption when hitting file-max
Date: Wed, 13 Jan 2010 14:12:39 -0800 [thread overview]
Message-ID: <20100113141239.b79aa7c3.akpm@linux-foundation.org> (raw)
In-Reply-To: <1263141242-23440-1-git-send-email-virtuoso@slind.org>
On Sun, 10 Jan 2010 18:34:02 +0200
Alexander Shishkin <virtuoso@slind.org> wrote:
> When a file descriptor limit is hit, display the top consumers of
> descriptors so that it is possible to identify and fix those which
> leak them.
>
> Two new sysctl tunables are introduced:
> * file-max-consumers -- number of processes to display (defaults
> to 10);
> * file-max-rate-limit -- time interval between subsequent dumps
> (defaults to 10 seconds).
>
> Signed-off-by: Alexander Shishkin <virtuoso@slind.org>
> CC: viro@zeniv.linux.org.uk
> CC: linux-fsdevel@vger.kernel.org
> ---
> fs/file_table.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/fs.h | 5 +++
> kernel/sysctl.c | 14 ++++++++
> 3 files changed, 108 insertions(+), 1 deletions(-)
>
Seems to be a lot of code for a fairly obscure requirement. Can we
justify adding it? Can't the operator use lsof(8)?
> +struct fd_consumer
> +{
Please use scripts/checkpatch.pl.
> + struct task_struct *task;
> + int fd_count;
> +};
> +
> +static int cmp_fd_consumers(const void *a, const void *b)
> +{
> + const struct fd_consumer *x = a, *y = b;
> +
> + return y->fd_count - x->fd_count;
> +}
> +
> +static void dump_fd_consumers(void)
> +{
> + struct task_struct *p;
> + struct files_struct *files;
> + struct fdtable *fdt;
> + int proc_limit = files_stat.max_consumers;
> + int i, nproc;
> + struct fd_consumer *procs, *tmp;
Please avoid variables called `tmp'. Think up a meaningful name.
> + if (!files_stat.max_consumers)
> + return;
> +
> + read_lock(&tasklist_lock);
> +
> + /* build an array of per-task file descriptor usage */
> + nproc = nr_processes();
> + procs = kzalloc(nproc * sizeof(struct fd_consumer), GFP_KERNEL);
GFP_KERNEL allocation under tasklist_lock is a bug. A bug which would
have been revealed during runtime testing had you empoyed the wise
advice in Documentation/SubmitChecklist!
> + if (!procs)
> + goto out;
> +
> + tmp = procs;
> +
> + for_each_process(p) {
> + tmp->task = p;
> +
> + files = get_files_struct(p);
This does task_lock() under tasklist_lock. Is that correct?
> + if (!files)
> + continue;
> +
> + spin_lock(&files->file_lock);
Similarly, is this the correct ranking?
Also, is it a _new_ ranking? Is there any place where we
simultaneously take tasklist_lock and file_lock? If so, that's a bit
undesirable.
> + fdt = files_fdtable(files);
> +
> + /* we have to actually *count* the fds */
> + for (tmp->fd_count = i = 0; i < fdt->max_fds; i++)
> + tmp->fd_count += !!fcheck_files(files, i);
> +
> + spin_unlock(&files->file_lock);
> + put_files_struct(files);
> +
> + tmp++;
> + }
> +
> + /* sort by number of used descriptor in descending order */
> + sort(procs, nproc, sizeof(struct fd_consumer), cmp_fd_consumers, NULL);
This code can hold tasklist lock for quite a long time.
> + if (proc_limit > nproc)
> + proc_limit = nproc;
> +
> + /* output the 'proc_limit' first entries */
> + for (i = 0, tmp = procs; i < proc_limit; i++, tmp++)
> + printk(KERN_INFO "=> %s [%d]: open=%d\n", tmp->task->comm,
> + tmp->task->pid, tmp->fd_count);
> +
> + kfree(procs);
> +
> +out:
> + read_unlock(&tasklist_lock);
> +}
> +
> /* Find an unused file structure and return a pointer to it.
> * Returns NULL, if there are no more free file structures or
> * we run out of memory.
> @@ -105,6 +182,7 @@ struct file *get_empty_filp(void)
> const struct cred *cred = current_cred();
> static int old_max;
> struct file * f;
> + static unsigned long next_dump = 0;
>
> /*
> * Privileged users can go above max_files
> @@ -140,6 +218,14 @@ over:
> if (get_nr_files() > old_max) {
> printk(KERN_INFO "VFS: file-max limit %d reached\n",
> get_max_files());
> +
> + /* dump the biggest file descriptor users */
> + if (!next_dump || time_after(jiffies, next_dump)) {
> + next_dump = jiffies + files_stat.rate_limit;
> +
> + dump_fd_consumers();
> + }
Use of __ratelimit() would be appropriate here.
> old_max = get_nr_files();
> }
> goto fail;
> @@ -425,6 +511,8 @@ void __init files_init(unsigned long mempages)
> files_stat.max_files = n;
> if (files_stat.max_files < NR_FILE)
> files_stat.max_files = NR_FILE;
> +
> + files_stat.rate_limit = DUMP_RATE_LIMIT;
> files_defer_init();
> percpu_counter_init(&nr_files, 0);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9147ca8..291beb3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -36,6 +36,8 @@ struct files_stat_struct {
> int nr_files; /* read only */
> int nr_free_files; /* read only */
> int max_files; /* tunable */
> + int max_consumers; /* tunable */
> + unsigned long rate_limit; /* tunable */
> };
>
> struct inodes_stat_t {
> @@ -46,6 +48,9 @@ struct inodes_stat_t {
>
>
> #define NR_FILE 8192 /* this can well be larger on a larger system */
> +#define NR_CONSUMERS 10 /* dump this many tasks when file-max is hit */
> +#define DUMP_RATE_LIMIT msecs_to_jiffies(10000) /* wait this long between
> + dumps */
>
> #define MAY_EXEC 1
> #define MAY_WRITE 2
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 8a68b24..dfb08fc 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1325,6 +1325,20 @@ static struct ctl_table fs_table[] = {
> .proc_handler = proc_dointvec,
> },
> {
> + .procname = "file-max-consumers",
> + .data = &files_stat.max_consumers,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> + .procname = "file-max-rate-limit",
> + .data = &files_stat.rate_limit,
> + .maxlen = sizeof(unsigned long),
> + .mode = 0644,
> + .proc_handler = proc_doulongvec_ms_jiffies_minmax,
> + },
> + {
> .procname = "nr_open",
> .data = &sysctl_nr_open,
> .maxlen = sizeof(int),
next prev parent reply other threads:[~2010-01-13 22:13 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-08 11:38 [PATCH] [RFC] List per-process file descriptor consumption when hitting file-max alexander.shishckin
2009-07-29 16:17 ` Alexander Shishkin
2009-07-30 12:40 ` Valdis.Kletnieks
2009-10-11 12:17 ` Alexander Shishkin
2010-01-10 16:34 ` [RFC][PATCHv2] " Alexander Shishkin
2010-01-13 22:12 ` Andrew Morton [this message]
2010-01-11 9:38 ` [RFC][PATCHv3] " Alexander Shishkin
2010-01-11 12:40 ` Andreas Dilger
2010-01-13 22:06 ` Andi Kleen
2010-01-13 22:44 ` Al Viro
2010-01-13 22:57 ` Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100113141239.b79aa7c3.akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=Valdis.Kletnieks@vt.edu \
--cc=linux-fsdevel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=virtuoso@slind.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).