All of lore.kernel.org
 help / color / mirror / Atom feed
From: bchociej@gmail.com
To: chris.mason@oracle.com, linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, cmm@us.ibm.com,
	bcchocie@us.ibm.com, mrlupfer@us.ibm.com, crscott@us.ibm.com,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH 4/5] Btrfs: Add debugfs interface for hot data stats
Date: Tue, 27 Jul 2010 17:00:22 -0500	[thread overview]
Message-ID: <1280268023-18408-5-git-send-email-bchociej@gmail.com> (raw)
In-Reply-To: <1280268023-18408-1-git-send-email-bchociej@gmail.com>

From: Ben Chociej <bcchocie@us.ibm.com>

Adds a ./btrfs_data/<device_name>/ directory in the debugfs directory
for each volume. The directory contains two files. The first,
`inode_data', contains the heat information for inodes that have been
brought into the hot data map structures. The second, `range_data',
contains similar information about subfile ranges.

Signed-off-by: Ben Chociej <bcchocie@us.ibm.com>
Signed-off-by: Matt Lupfer <mrlupfer@us.ibm.com>
Signed-off-by: Conor Scott <crscott@us.ibm.com>
Reviewed-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Steve French <sfrench@us.ibm.com>
---
 fs/btrfs/debugfs.c |  500 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/debugfs.h |   57 ++++++
 2 files changed, 557 insertions(+), 0 deletions(-)
 create mode 100644 fs/btrfs/debugfs.c
 create mode 100644 fs/btrfs/debugfs.h

diff --git a/fs/btrfs/debugfs.c b/fs/btrfs/debugfs.c
new file mode 100644
index 0000000..a0e7bb7
--- /dev/null
+++ b/fs/btrfs/debugfs.c
@@ -0,0 +1,500 @@
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/limits.h>
+#include "ctree.h"
+#include "hotdata_map.h"
+#include "hotdata_hash.h"
+#include "debugfs.h"
+
+/*
+ * debugfs.c contains the code to interface with the btrfs debugfs.
+ * The debugfs outputs range- and file-level access frequency
+ * statistics for each mounted volume.
+ */
+
+static int copy_msg_to_log(struct debugfs_vol_data *data, char *msg, int len)
+{
+	struct lstring *debugfs_log = data->debugfs_log;
+	uint new_log_alloc_size;
+	char *new_log;
+
+	if (len >= data->log_alloc_size - debugfs_log->len) {
+		/* Not enough room in the log buffer for the new message. */
+		/* Allocate a bigger buffer. */
+		new_log_alloc_size = data->log_alloc_size + LOG_PAGE_SIZE;
+		new_log = vmalloc(new_log_alloc_size);
+
+		if (new_log) {
+			memcpy(new_log, debugfs_log->str,
+				debugfs_log->len);
+			memset(new_log + debugfs_log->len, 0,
+				new_log_alloc_size - debugfs_log->len);
+			vfree(debugfs_log->str);
+			debugfs_log->str = new_log;
+			data->log_alloc_size = new_log_alloc_size;
+		} else {
+			WARN_ON(1);
+			if (data->log_alloc_size - debugfs_log->len) {
+				#define err_msg "No more memory!\n"
+				strlcpy(debugfs_log->str +
+					debugfs_log->len,
+					err_msg, data->log_alloc_size -
+					debugfs_log->len);
+				debugfs_log->len +=
+					min((typeof(debugfs_log->len))
+					sizeof(err_msg),
+					((typeof(debugfs_log->len))
+					data->log_alloc_size -
+					debugfs_log->len));
+			}
+			return 0;
+		}
+	}
+
+	memcpy(debugfs_log->str + debugfs_log->len,
+		data->log_work_buff, len);
+	debugfs_log->len += (unsigned long) len;
+
+	return len;
+}
+
+/* Returns the number of bytes written to the log. */
+static int debugfs_log(struct debugfs_vol_data *data, const char *fmt, ...)
+{
+	struct lstring *debugfs_log = data->debugfs_log;
+	va_list args;
+	int len;
+
+	if (debugfs_log->str == NULL)
+		return -1;
+
+	spin_lock(&data->log_lock);
+
+	va_start(args, fmt);
+	len = vsnprintf(data->log_work_buff, sizeof(data->log_work_buff), fmt,
+		args);
+	va_end(args);
+
+	if (len >= sizeof(data->log_work_buff)) {
+		#define truncate_msg "The next message has been truncated.\n"
+		copy_msg_to_log(data, truncate_msg, sizeof(truncate_msg));
+	}
+
+	len = copy_msg_to_log(data, data->log_work_buff, len);
+	spin_unlock(&data->log_lock);
+
+	return len;
+}
+
+/* initialize a log corresponding to a btrfs volume */
+static int debugfs_log_init(struct debugfs_vol_data *data)
+{
+	int err = 0;
+	struct lstring *debugfs_log = data->debugfs_log;
+
+	spin_lock(&data->log_lock);
+	debugfs_log->str = vmalloc(INIT_LOG_ALLOC_SIZE);
+
+	if (debugfs_log->str) {
+		memset(debugfs_log->str, 0, INIT_LOG_ALLOC_SIZE);
+		data->log_alloc_size = INIT_LOG_ALLOC_SIZE;
+	} else {
+		err = -ENOMEM;
+	}
+
+	spin_unlock(&data->log_lock);
+	return err;
+}
+
+/* free a log corresponding to a btrfs volume */
+static void debugfs_log_exit(struct debugfs_vol_data *data)
+{
+	struct lstring *debugfs_log = data->debugfs_log;
+	spin_lock(&data->log_lock);
+	vfree(debugfs_log->str);
+	debugfs_log->str = NULL;
+	debugfs_log->len = 0;
+	spin_unlock(&data->log_lock);
+}
+
+/* fops to override for printing range data */
+static const struct file_operations btrfs_debugfs_range_fops = {
+	.read	= __btrfs_debugfs_range_read,
+	.open	= __btrfs_debugfs_open,
+};
+
+/* fops to override for printing inode data */
+static const struct file_operations btrfs_debugfs_inode_fops = {
+	.read	= __btrfs_debugfs_inode_read,
+	.open	= __btrfs_debugfs_open,
+};
+
+/* initialize debugfs for btrfs at module init */
+int btrfs_init_debugfs(void)
+{
+	debugfs_root_dentry = debugfs_create_dir(DEBUGFS_ROOT_NAME, NULL);
+	/*init list of debugfs data list */
+	INIT_LIST_HEAD(&debugfs_vol_data_list);
+	/*init lock to list of debugfs data list */
+	spin_lock_init(&data_list_lock);
+	if (!debugfs_root_dentry)
+		goto debugfs_error;
+	return 0;
+
+debugfs_error:
+	return -EIO;
+}
+
+/*
+ * on each volume mount, initialize the debugfs dentries and associated
+ * structures (debugfs_vol_data and debugfs_log)
+ */
+int btrfs_init_debugfs_volume(const char *uuid, struct super_block *sb)
+{
+	struct dentry *debugfs_volume_entry = NULL;
+	struct dentry *debugfs_range_entry = NULL;
+	struct dentry *debugfs_inode_entry = NULL;
+	struct debugfs_vol_data *range_data = NULL;
+	struct debugfs_vol_data *inode_data = NULL;
+	size_t dev_name_length = strlen(uuid);
+	char dev[NAME_MAX];
+
+	if (!debugfs_root_dentry)
+		goto debugfs_error;
+
+	/* create debugfs folder for this volume by mounted dev name */
+	memcpy(dev, uuid + DEV_NAME_CHOP, dev_name_length -
+		DEV_NAME_CHOP + 1);
+	debugfs_volume_entry = debugfs_create_dir(dev, debugfs_root_dentry);
+
+	if (!debugfs_volume_entry)
+		goto debugfs_error;
+
+	/* malloc and initialize debugfs_vol_data for range_data */
+	range_data = kmalloc(sizeof(struct debugfs_vol_data),
+		GFP_KERNEL | GFP_NOFS);
+	memset(range_data, 0, sizeof(struct debugfs_vol_data));
+	range_data->debugfs_log = NULL;
+	range_data->sb = sb;
+	spin_lock_init(&range_data->log_lock);
+	range_data->log_alloc_size = 0;
+
+	/* malloc and initialize debugfs_vol_data for range_data */
+	inode_data = kmalloc(sizeof(struct debugfs_vol_data),
+		GFP_KERNEL | GFP_NOFS);
+	memset(inode_data, 0, sizeof(struct debugfs_vol_data));
+	inode_data->debugfs_log = NULL;
+	inode_data->sb = sb;
+	spin_lock_init(&inode_data->log_lock);
+	inode_data->log_alloc_size = 0;
+
+	/* add debugfs_vol_data for inode data and range data for
+	 * volume to list */
+	range_data->de = debugfs_volume_entry;
+	inode_data->de = debugfs_volume_entry;
+	spin_lock(&data_list_lock);
+	list_add(&range_data->node, &debugfs_vol_data_list);
+	list_add(&inode_data->node, &debugfs_vol_data_list);
+	spin_unlock(&data_list_lock);
+
+	/* create debugfs range_data file */
+	debugfs_range_entry = debugfs_create_file("range_data",
+			   S_IFREG | S_IRUSR | S_IWUSR |
+			   S_IRUGO,
+			   debugfs_volume_entry,
+			   (void *) range_data,
+			   &btrfs_debugfs_range_fops);
+	if (!debugfs_range_entry)
+		goto debugfs_error;
+
+	/* create debugfs inode_data file */
+	debugfs_inode_entry = debugfs_create_file("inode_data",
+			   S_IFREG | S_IRUSR | S_IWUSR |
+			   S_IRUGO,
+			   debugfs_volume_entry,
+			   (void *) inode_data,
+			   &btrfs_debugfs_inode_fops);
+
+	if (!debugfs_inode_entry)
+		goto debugfs_error;
+
+	return 0;
+
+debugfs_error:
+
+	kfree(range_data);
+	kfree(inode_data);
+
+	return -EIO;
+}
+
+/* find volume mounted (match by superblock) and remove
+ * debugfs dentry
+ */
+void btrfs_exit_debugfs_volume(struct super_block *sb)
+{
+	struct list_head *head;
+	struct list_head *pos;
+	struct debugfs_vol_data *data;
+	spin_lock(&data_list_lock);
+	head = &debugfs_vol_data_list;
+	/* must clean up memory assicatied with superblock */
+	list_for_each(pos, head)
+	{
+		data = list_entry(pos, struct debugfs_vol_data, node);
+		if (data->sb == sb) {
+			list_del(pos);
+			debugfs_remove_recursive(data->de);
+			kfree(data);
+			data = NULL;
+			break;
+		}
+	}
+	spin_unlock(&data_list_lock);
+}
+
+/* clean up memory and remove dentries for debugsfs */
+void btrfs_exit_debugfs(void)
+{
+	/* first iterate through debugfs_vol_data_list and free memory */
+	struct list_head *head;
+	struct list_head *pos;
+	struct list_head *cur;
+	struct debugfs_vol_data *data;
+
+	spin_lock(&data_list_lock);
+	head = &debugfs_vol_data_list;
+	list_for_each_safe(pos, cur, head) {
+		data = list_entry(pos, struct debugfs_vol_data, node);
+		if (data && pos != head)
+			kfree(data);
+	}
+	spin_unlock(&data_list_lock);
+
+	/* remove all debugfs entries recursively from the root */
+	debugfs_remove_recursive(debugfs_root_dentry);
+}
+
+/* debugfs open file override from fops table */
+int __btrfs_debugfs_open(struct inode *inode, struct file *file)
+{
+	if (inode->i_private)
+		file->private_data = inode->i_private;
+
+	return 0;
+}
+
+/* debugfs read file override from fops table */
+ssize_t __btrfs_debugfs_range_read(struct file *file, char __user *user,
+			     size_t count, loff_t *ppos)
+{
+	int err = 0;
+	struct super_block *sb;
+	struct btrfs_root *root;
+	struct btrfs_root *fs_root;
+	struct hot_inode_item *current_hot_inode;
+	struct debugfs_vol_data *data;
+	struct lstring *debugfs_log;
+
+	data = (struct debugfs_vol_data *) file->private_data;
+	sb = data->sb;
+	root = btrfs_sb(sb);
+	fs_root = (struct btrfs_root *) root->fs_info->fs_root;
+
+	if (!data->debugfs_log) {
+		/* initialize debugfs log corresponding to this volume*/
+		debugfs_log = kmalloc(sizeof(struct lstring),
+			GFP_KERNEL | GFP_NOFS);
+		debugfs_log->str = NULL,
+		debugfs_log->len = 0;
+		data->debugfs_log = debugfs_log;
+		debugfs_log_init(data);
+	}
+
+	if ((unsigned long) *ppos > 0) {
+		/* caller is continuing a previous read, don't walk tree */
+		if ((unsigned long) *ppos >= data->debugfs_log->len)
+			goto clean_up;
+
+		goto print_to_user;
+	}
+
+	/* walk the inode tree */
+
+	current_hot_inode = find_next_hot_inode(fs_root, 0);
+
+	while (current_hot_inode) {
+		/* walk ranges, print data to debugfs log */
+		__walk_range_tree(current_hot_inode, data);
+
+		free_hot_inode_item(current_hot_inode);
+		current_hot_inode = find_next_hot_inode(fs_root,
+				(u64) current_hot_inode->i_ino + 1);
+	}
+
+print_to_user:
+
+	if (data->debugfs_log->len) {
+		err = simple_read_from_buffer(user, count, ppos,
+				      data->debugfs_log->str,
+				      data->debugfs_log->len);
+	}
+
+	return err;
+
+clean_up:
+
+	/* reader has finished the file */
+	/* clean up */
+
+	debugfs_log_exit(data);
+	kfree(data->debugfs_log);
+	data->debugfs_log = NULL;
+
+	return 0;
+}
+
+/* debugfs read file override from fops table */
+ssize_t __btrfs_debugfs_inode_read(struct file *file, char __user *user,
+			     size_t count, loff_t *ppos)
+{
+	int err = 0;
+	struct super_block *sb;
+	struct btrfs_root *root;
+	struct btrfs_root *fs_root;
+	struct hot_inode_item *current_hot_inode;
+	struct debugfs_vol_data *data;
+	struct lstring *debugfs_log;
+
+	data = (struct debugfs_vol_data *) file->private_data;
+	sb = data->sb;
+	root = btrfs_sb(sb);
+	fs_root = (struct btrfs_root *) root->fs_info->fs_root;
+
+	if (!data->debugfs_log) {
+		/* initialize debugfs log corresponding to this volume */
+		debugfs_log = kmalloc(sizeof(struct lstring),
+			GFP_KERNEL | GFP_NOFS);
+		debugfs_log->str = NULL,
+		debugfs_log->len = 0;
+		data->debugfs_log = debugfs_log;
+		debugfs_log_init(data);
+	}
+
+	if ((unsigned long) *ppos > 0) {
+		/* caller is continuing a previous read, don't walk tree */
+		if ((unsigned long) *ppos >= data->debugfs_log->len)
+			goto clean_up;
+
+		goto print_to_user;
+	}
+
+	/* walk the inode tree */
+
+	current_hot_inode = find_next_hot_inode(fs_root, 0);
+
+	while (current_hot_inode) {
+		/* walk ranges, print data to debugfs log */
+		__print_inode_freq_data(current_hot_inode, data);
+
+		free_hot_inode_item(current_hot_inode);
+		current_hot_inode = find_next_hot_inode(fs_root,
+				(u64) current_hot_inode->i_ino + 1);
+	}
+
+print_to_user:
+
+	if (data->debugfs_log->len) {
+		err = simple_read_from_buffer(user, count, ppos,
+				      data->debugfs_log->str,
+				      data->debugfs_log->len);
+	}
+
+	return err;
+
+clean_up:
+
+	/* reader has finished the file */
+	/* clean up */
+	debugfs_log_exit(data);
+	kfree(data->debugfs_log);
+	data->debugfs_log = NULL;
+
+	return 0;
+}
+
+/*
+ * Take the inode, find ranges associated with inode
+ * and print each range data struct
+ */
+void __walk_range_tree(struct hot_inode_item *hot_inode,
+		       struct debugfs_vol_data *data)
+{
+	struct hot_range_tree *inode_range_tree;
+	struct rb_node *node;
+	struct hot_range_item *current_range;
+
+	inode_range_tree = &hot_inode->hot_range_tree;
+	read_lock(&inode_range_tree->lock);
+	node = rb_first(&inode_range_tree->map);
+
+	/* Walk the hot_range_tree for inode */
+	while (node) {
+		current_range = rb_entry(node, struct hot_range_item, rb_node);
+		__print_range_freq_data(hot_inode, current_range, data);
+		node = rb_next(node);
+	}
+	read_unlock(&inode_range_tree->lock);
+}
+
+/* Print frequency data for each range to log */
+void __print_range_freq_data(struct hot_inode_item *hot_inode,
+			     struct hot_range_item *hot_range,
+			     struct debugfs_vol_data *data)
+{
+	struct btrfs_freq_data *freq_data;
+	int temp;
+	freq_data = &hot_range->freq_data;
+	read_lock(&hot_range->heat_node->hlist->rwlock);
+	temp = hot_range->heat_node->hlist->temperature;
+	read_unlock(&hot_range->heat_node->hlist->rwlock);
+
+	/* Always lock hot_inode_item first */
+	spin_lock(&hot_inode->lock);
+	spin_lock(&hot_range->lock);
+	debugfs_log(data, "inode #%lu, range start "
+			"%llu (range len %llu) reads %u, writes %u, temp %u\n",
+			hot_inode->i_ino,
+			hot_range->start,
+			hot_range->len,
+			freq_data->nr_reads,
+			freq_data->nr_writes,
+			temp);
+	spin_unlock(&hot_range->lock);
+	spin_unlock(&hot_inode->lock);
+}
+
+/* Print frequency data for each freq data to log */
+void __print_inode_freq_data(struct hot_inode_item *hot_inode,
+			     struct debugfs_vol_data *data)
+{
+	struct btrfs_freq_data *freq_data;
+	int temp;
+	freq_data = &hot_inode->freq_data;
+
+	read_lock(&hot_inode->heat_node->hlist->rwlock);
+	temp = hot_inode->heat_node->hlist->temperature;
+	read_unlock(&hot_inode->heat_node->hlist->rwlock);
+
+	spin_lock(&hot_inode->lock);
+	debugfs_log(data, "inode #%lu, reads %u, writes %u, temp %u\n",
+			hot_inode->i_ino,
+			freq_data->nr_reads,
+			freq_data->nr_writes,
+			temp);
+	spin_unlock(&hot_inode->lock);
+}
+
diff --git a/fs/btrfs/debugfs.h b/fs/btrfs/debugfs.h
new file mode 100644
index 0000000..bdd4938
--- /dev/null
+++ b/fs/btrfs/debugfs.h
@@ -0,0 +1,57 @@
+#ifndef __BTRFS_DEBUGFS__
+#define __BTRFS_DEBUGFS__
+
+/* size of log to vmalloc */
+#define INIT_LOG_ALLOC_SIZE (PAGE_SIZE * 10)
+#define LOG_PAGE_SIZE (PAGE_SIZE * 10)
+
+/* number of chars of device name of chop off for making debugfs folder
+ * e.g. /dev/sda -> sda */
+#define DEV_NAME_CHOP 5
+
+/* list to keep track of each mounted volumes debugfs_vol_data */
+static struct list_head debugfs_vol_data_list;
+/* lock for debugfs_vol_data_list */
+static spinlock_t data_list_lock;
+
+/*
+ * Name for BTRFS data in debugfs directory
+ * e.g. /sys/kernel/debug/btrfs_data
+ */
+#define DEBUGFS_ROOT_NAME "btrfs_data"
+/* pointer to top level debugfs dentry */
+static struct dentry *debugfs_root_dentry;
+
+/* log to output to userspace in debugfs files */
+struct lstring {
+	char		*str;
+	unsigned long	len;
+};
+
+/*
+ * debugfs_vol_data is a struct of items that is passed to the debugfs
+ */
+struct debugfs_vol_data {
+	struct list_head node; /* protected by data_list_lock */
+	struct lstring *debugfs_log;
+	struct super_block *sb;
+	struct dentry *de;
+	spinlock_t log_lock; /* protects debugfs_log */
+	char log_work_buff[1024];
+	uint log_alloc_size;
+};
+
+ssize_t __btrfs_debugfs_range_read(struct file *file, char __user *user,
+	size_t size, loff_t *len);
+ssize_t __btrfs_debugfs_inode_read(struct file *file, char __user *user,
+	size_t size, loff_t *len);
+int __btrfs_debugfs_open(struct inode *inode, struct file *file);
+void __walk_range_tree(struct hot_inode_item *hot_inode,
+			struct debugfs_vol_data *data);
+void __print_range_freq_data(struct hot_inode_item *hot_inode,
+		       struct hot_range_item *hot_range,
+		       struct debugfs_vol_data *data);
+void __print_inode_freq_data(struct hot_inode_item *hot_inode,
+		       struct debugfs_vol_data *data);
+
+#endif
-- 
1.7.1


  parent reply	other threads:[~2010-07-27 22:00 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-27 22:00 [RFC PATCH 0/5] Btrfs: Add hot data tracking functionality bchociej
2010-07-27 22:00 ` [RFC PATCH 1/5] Btrfs: Add experimental hot data hash list index bchociej
2010-07-27 22:00 ` [RFC PATCH 2/5] Btrfs: Add data structures for hot data tracking bchociej
2010-07-27 22:00 ` [RFC PATCH 3/5] Btrfs: 3 new ioctls related to hot data features bchociej
2010-07-27 22:00 ` bchociej [this message]
2010-07-27 22:00 ` [RFC PATCH 5/5] Btrfs: Add hooks to enable hot data tracking bchociej
2010-07-27 22:29 ` [RFC PATCH 0/5] Btrfs: Add hot data tracking functionality Tracy Reed
2010-07-28 21:22   ` Mingming Cao
2010-07-27 23:10 ` Diego Calleja
2010-07-27 23:10   ` Diego Calleja
2010-07-27 23:10   ` Diego Calleja
2010-07-27 23:18   ` Ben Chociej
2010-07-27 23:18     ` Ben Chociej
2010-07-28 12:28     ` Chris Samuel
2010-07-27 23:38 ` Christian Stroetmann
2010-07-28 22:00   ` Mingming Cao
2010-07-29 12:17     ` Dave Chinner
2010-07-29 13:17       ` Christian Stroetmann
2010-08-04 17:40       ` Mingming Cao
2010-08-04 18:44         ` Christian Stroetmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1280268023-18408-5-git-send-email-bchociej@gmail.com \
    --to=bchociej@gmail.com \
    --cc=bcchocie@us.ibm.com \
    --cc=chris.mason@oracle.com \
    --cc=cmm@us.ibm.com \
    --cc=crscott@us.ibm.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mrlupfer@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.