linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/4] BloodTest: kernel status
@ 2017-10-13  8:56 Hui Zhu
  2017-10-13  8:56 ` [RFC 1/4] " Hui Zhu
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Hui Zhu @ 2017-10-13  8:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: teawater, Hui Zhu

BloodTest: an interface to call other analysing tools

Linux kernel has a lot of analysing tools, perf, ftrace, systemtap, KGTP
and so on.
And kernel also supplies a lot of internal value from procfs and sysfs
to analyse the performance.

Sometime, user need get performance infomation quickly, low overhead and
full coverage.
BloodTest is for it.
It is a interface can acess function of other analysing tools and
records to internal buffer that user or application can access very
quickly (mmap).

Now, BloodTest just support record cpu, perf and task infomation in
one seconds.

Hui Zhu (2):
BloodTest: kernel status
BloodTest: perf
Module: add /proc/modules_update_version
BloodTest: task

 fs/proc/stat.c                 |    8 
 include/linux/bloodtest.h      |   10 
 include/linux/kernel_stat.h    |    3 
 init/Kconfig                   |    3 
 kernel/Makefile                |    2 
 kernel/bloodtest/Makefile      |    3 
 kernel/bloodtest/core.c        |  132 +++++++++
 kernel/bloodtest/internal.h    |   61 ++++
 kernel/bloodtest/kernel_stat.c |   62 ++++
 kernel/bloodtest/pages.c       |  266 ++++++++++++++++++
 kernel/bloodtest/perf.c        |  576 +++++++++++++++++++++++++++++++++++++++++
 kernel/bloodtest/task.c        |  447 +++++++++++++++++++++++++++++++
 kernel/exit.c                  |    4 
 kernel/module.c                |   19 +
 14 files changed, 1592 insertions(+), 4 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [RFC 1/4] BloodTest: kernel status
  2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
@ 2017-10-13  8:56 ` Hui Zhu
  2017-10-13  8:56 ` [RFC 2/4] BloodTest: perf Hui Zhu
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Hui Zhu @ 2017-10-13  8:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: teawater, Hui Zhu

This patch include the base framework of BloodTest and get the kernel
status function.

The interface is in "/sys/kernel/debug/bloodtest".
Access "test" will call bt_insert that will call all start record
function.  And register a hrtimer to call bt_pullout to stop record.

bt_insert and bt_pullout will call analysing tools.

Signed-off-by: Hui Zhu <zhuhui@xiaomi.com>
---
 fs/proc/stat.c                 |   8 +--
 include/linux/kernel_stat.h    |   3 ++
 init/Kconfig                   |   3 ++
 kernel/Makefile                |   2 +
 kernel/bloodtest/Makefile      |   1 +
 kernel/bloodtest/core.c        | 117 +++++++++++++++++++++++++++++++++++++++++
 kernel/bloodtest/internal.h    |  19 +++++++
 kernel/bloodtest/kernel_stat.c |  62 ++++++++++++++++++++++
 8 files changed, 211 insertions(+), 4 deletions(-)
 create mode 100644 kernel/bloodtest/Makefile
 create mode 100644 kernel/bloodtest/core.c
 create mode 100644 kernel/bloodtest/internal.h
 create mode 100644 kernel/bloodtest/kernel_stat.c

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bd4e55f..c6f4fd4 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,7 +22,7 @@
 
 #ifdef arch_idle_time
 
-static u64 get_idle_time(int cpu)
+u64 get_idle_time(int cpu)
 {
 	u64 idle;
 
@@ -32,7 +32,7 @@ static u64 get_idle_time(int cpu)
 	return idle;
 }
 
-static u64 get_iowait_time(int cpu)
+u64 get_iowait_time(int cpu)
 {
 	u64 iowait;
 
@@ -44,7 +44,7 @@ static u64 get_iowait_time(int cpu)
 
 #else
 
-static u64 get_idle_time(int cpu)
+u64 get_idle_time(int cpu)
 {
 	u64 idle, idle_usecs = -1ULL;
 
@@ -60,7 +60,7 @@ static u64 get_idle_time(int cpu)
 	return idle;
 }
 
-static u64 get_iowait_time(int cpu)
+u64 get_iowait_time(int cpu)
 {
 	u64 iowait, iowait_usecs = -1ULL;
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66be8b6..bf8d3f0 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -96,4 +96,7 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
 
 extern void account_idle_ticks(unsigned long ticks);
 
+extern u64 get_idle_time(int cpu);
+extern u64 get_iowait_time(int cpu);
+
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/init/Kconfig b/init/Kconfig
index 78cb246..f63550c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1909,3 +1909,6 @@ config ASN1
 	  functions to call on what tags.
 
 source "kernel/Kconfig.locks"
+
+config BLOODTEST
+	bool "Blood test"
diff --git a/kernel/Makefile b/kernel/Makefile
index ed470aa..2a04e42 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,8 @@ obj-$(CONFIG_BPF) += bpf/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
+obj-$(CONFIG_BLOODTEST) += bloodtest/
+
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
diff --git a/kernel/bloodtest/Makefile b/kernel/bloodtest/Makefile
new file mode 100644
index 0000000..7f289af
--- /dev/null
+++ b/kernel/bloodtest/Makefile
@@ -0,0 +1 @@
+obj-y	= core.o kernel_stat.o
diff --git a/kernel/bloodtest/core.c b/kernel/bloodtest/core.c
new file mode 100644
index 0000000..7b39cbb
--- /dev/null
+++ b/kernel/bloodtest/core.c
@@ -0,0 +1,117 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/hrtimer.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+
+#include "internal.h"
+
+enum bt_stat_enum bt_stat;
+DEFINE_SPINLOCK(bt_lock);
+
+static DECLARE_WAIT_QUEUE_HEAD(bt_wq);
+static struct hrtimer bt_timer;
+static ktime_t bt_ktime;
+
+static bool is_bt_stat(enum bt_stat_enum stat)
+{
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&bt_lock, flags);
+	if (bt_stat == stat)
+		ret = true;
+	spin_unlock_irqrestore(&bt_lock, flags);
+
+	return ret;
+}
+
+/* This function must be called under the protection of bt_lock.  */
+static void bt_insert(void)
+{
+	bt_stat = bt_running;
+
+	bt_insert_kernel_stat();
+}
+
+/* This function must be called under the protection of bt_lock.  */
+static void bt_pullout(void)
+{
+	bt_pullout_kernel_stat();
+
+	bt_stat = bt_done;
+}
+
+/* This function must be called under the protection of bt_lock.  */
+static void bt_report(struct seq_file *p)
+{
+	bt_report_kernel_stat(p);
+}
+
+static enum hrtimer_restart bt_timer_fn(struct hrtimer *data)
+{
+	spin_lock(&bt_lock);
+	bt_pullout();
+	spin_unlock(&bt_lock);
+
+	wake_up_interruptible_all(&bt_wq);
+
+	return HRTIMER_NORESTART;
+}
+
+static int test_show(struct seq_file *p, void *v)
+{
+	int ret = 0;
+
+	spin_lock(&bt_lock);
+	if (bt_stat == bt_running)
+		goto wait;
+
+	hrtimer_start(&bt_timer, bt_ktime, HRTIMER_MODE_REL);
+	bt_insert();
+
+wait:
+	spin_unlock(&bt_lock);
+	ret = wait_event_interruptible(bt_wq, is_bt_stat(bt_done));
+	if (ret)
+		goto out;
+
+	spin_lock(&bt_lock);
+	bt_report(p);
+	spin_unlock(&bt_lock);
+
+out:
+	return ret;
+}
+
+static int test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, test_show, NULL);
+}
+
+static const struct file_operations test_fops = {
+	.open		= test_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init bt_init(void)
+{
+	struct dentry *d, *t;
+
+	d = debugfs_create_dir("bloodtest", NULL);
+	if (!d)
+		return -ENOMEM;
+	t = debugfs_create_file("test", S_IRUSR, d, NULL, &test_fops);
+	if (!t)
+		return -ENOMEM;
+
+	hrtimer_init(&bt_timer, CLOCK_REALTIME, HRTIMER_MODE_REL);
+	bt_timer.function = bt_timer_fn;
+	bt_ktime = ktime_set(1, 0);
+
+	return 0;
+}
+
+core_initcall(bt_init);
diff --git a/kernel/bloodtest/internal.h b/kernel/bloodtest/internal.h
new file mode 100644
index 0000000..48faf4d
--- /dev/null
+++ b/kernel/bloodtest/internal.h
@@ -0,0 +1,19 @@
+#ifndef _KERNEL_BLOODTEST_INTERNAL_H
+#define _KERNEL_BLOODTEST_INTERNAL_H
+
+#include <linux/seq_file.h>
+
+enum bt_stat_enum {
+	bt_empty = 0,
+	bt_done,
+	bt_running,
+};
+
+extern enum bt_stat_enum bt_stat;
+extern spinlock_t bt_lock;
+
+extern void bt_insert_kernel_stat(void);
+extern void bt_pullout_kernel_stat(void);
+extern void bt_report_kernel_stat(struct seq_file *p);
+
+#endif /* _KERNEL_BLOODTEST_INTERNAL_H */
diff --git a/kernel/bloodtest/kernel_stat.c b/kernel/bloodtest/kernel_stat.c
new file mode 100644
index 0000000..7c37403
--- /dev/null
+++ b/kernel/bloodtest/kernel_stat.c
@@ -0,0 +1,62 @@
+#include <linux/kernel_stat.h>
+
+#include "internal.h"
+
+struct kernel_cpustat_rec {
+	u64 rec[2][NR_STATS];
+};
+static DEFINE_PER_CPU(struct kernel_cpustat_rec, cpustat_rec);
+
+#define kstat(cpu) (per_cpu(cpustat_rec, cpu).rec)
+
+static void record_kernel_stat(int rec)
+{
+	int cpu;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	for_each_possible_cpu(cpu) {
+		enum cpu_usage_stat j;
+
+		for (j = CPUTIME_USER; j < NR_STATS; j++) {
+			if (j == CPUTIME_IDLE)
+				kstat(cpu)[rec][j] = get_idle_time(cpu);
+			else if (j == CPUTIME_IOWAIT)
+				kstat(cpu)[rec][j] = get_iowait_time(cpu);
+			else
+				kstat(cpu)[rec][j]
+					= kcpustat_cpu(cpu).cpustat[j];
+		}
+	}
+	local_irq_restore(flags);
+}
+
+static unsigned long long
+get_kernel_stat(int cpu, enum cpu_usage_stat stat)
+{
+	return (unsigned long long)(kstat(cpu)[1][stat] - kstat(cpu)[0][stat]);
+}
+
+void bt_insert_kernel_stat(void)
+{
+	record_kernel_stat(0);
+}
+
+void bt_pullout_kernel_stat(void)
+{
+	record_kernel_stat(1);
+}
+
+void bt_report_kernel_stat(struct seq_file *p)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		enum cpu_usage_stat j;
+
+		seq_printf(p, "cpu%d", cpu);
+		for (j = CPUTIME_USER; j < NR_STATS; j++)
+			seq_put_decimal_ull(p, " ", get_kernel_stat(cpu, j));
+		seq_putc(p, '\n');
+	}
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC 2/4] BloodTest: perf
  2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
  2017-10-13  8:56 ` [RFC 1/4] " Hui Zhu
@ 2017-10-13  8:56 ` Hui Zhu
  2017-10-13  8:56 ` [RFC 3/4] module: add /proc/modules_update_version Hui Zhu
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Hui Zhu @ 2017-10-13  8:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: teawater, Hui Zhu

This patch add the function that call perf function and bt_pages that
can record the data that get from perf.

The interface is in "/sys/kernel/debug/bloodtest/perf".
"on" is the switch.  When it set to 1, access "test" will call perf.
There are "perf_config", "perf_freq", "perf_period", "perf_type" can
set the options of perf.
After record, access "str" will get the record data in string.
Access "cpu0/page" will get the record data in binary that is format is
in "bin_format".

Signed-off-by: Hui Zhu <zhuhui@xiaomi.com>
---
 kernel/bloodtest/Makefile   |   4 +-
 kernel/bloodtest/core.c     |  76 +++---
 kernel/bloodtest/internal.h |  43 +++-
 kernel/bloodtest/pages.c    | 266 ++++++++++++++++++++
 kernel/bloodtest/perf.c     | 591 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 931 insertions(+), 49 deletions(-)
 create mode 100644 kernel/bloodtest/pages.c
 create mode 100644 kernel/bloodtest/perf.c

diff --git a/kernel/bloodtest/Makefile b/kernel/bloodtest/Makefile
index 7f289af..79b7ea0 100644
--- a/kernel/bloodtest/Makefile
+++ b/kernel/bloodtest/Makefile
@@ -1 +1,3 @@
-obj-y	= core.o kernel_stat.o
+obj-y	= core.o pages.o kernel_stat.o
+
+obj-$(CONFIG_PERF_EVENTS) += perf.o
diff --git a/kernel/bloodtest/core.c b/kernel/bloodtest/core.c
index 7b39cbb..5ba800c 100644
--- a/kernel/bloodtest/core.c
+++ b/kernel/bloodtest/core.c
@@ -6,31 +6,17 @@
 
 #include "internal.h"
 
-enum bt_stat_enum bt_stat;
-DEFINE_SPINLOCK(bt_lock);
+DECLARE_RWSEM(bt_lock);
 
 static DECLARE_WAIT_QUEUE_HEAD(bt_wq);
 static struct hrtimer bt_timer;
 static ktime_t bt_ktime;
-
-static bool is_bt_stat(enum bt_stat_enum stat)
-{
-	unsigned long flags;
-	bool ret = false;
-
-	spin_lock_irqsave(&bt_lock, flags);
-	if (bt_stat == stat)
-		ret = true;
-	spin_unlock_irqrestore(&bt_lock, flags);
-
-	return ret;
-}
+static bool bt_timer_stop;
 
 /* This function must be called under the protection of bt_lock.  */
 static void bt_insert(void)
 {
-	bt_stat = bt_running;
-
+	bt_insert_perf();
 	bt_insert_kernel_stat();
 }
 
@@ -38,8 +24,13 @@ static void bt_insert(void)
 static void bt_pullout(void)
 {
 	bt_pullout_kernel_stat();
+	bt_pullout_perf();
+}
 
-	bt_stat = bt_done;
+/* This function must be called under the protection of bt_lock.  */
+static void bt_task_pullout(void)
+{
+	bt_task_pullout_perf();
 }
 
 /* This function must be called under the protection of bt_lock.  */
@@ -50,38 +41,33 @@ static void bt_report(struct seq_file *p)
 
 static enum hrtimer_restart bt_timer_fn(struct hrtimer *data)
 {
-	spin_lock(&bt_lock);
 	bt_pullout();
-	spin_unlock(&bt_lock);
 
-	wake_up_interruptible_all(&bt_wq);
+	bt_timer_stop = true;
+	wake_up_all(&bt_wq);
 
 	return HRTIMER_NORESTART;
 }
 
-static int test_show(struct seq_file *p, void *v)
+static int test_show(struct seq_file *p, void *unused)
 {
-	int ret = 0;
+	down_write(&bt_lock);
 
-	spin_lock(&bt_lock);
-	if (bt_stat == bt_running)
-		goto wait;
+	bt_timer_stop = false;
 
-	hrtimer_start(&bt_timer, bt_ktime, HRTIMER_MODE_REL);
 	bt_insert();
+	hrtimer_start(&bt_timer, bt_ktime, HRTIMER_MODE_REL);
 
-wait:
-	spin_unlock(&bt_lock);
-	ret = wait_event_interruptible(bt_wq, is_bt_stat(bt_done));
-	if (ret)
-		goto out;
+	wait_event(bt_wq, bt_timer_stop);
 
-	spin_lock(&bt_lock);
-	bt_report(p);
-	spin_unlock(&bt_lock);
+	bt_task_pullout();
+	up_write(&bt_lock);
 
-out:
-	return ret;
+	down_read(&bt_lock);
+	bt_report(p);
+	up_read(&bt_lock);
+	
+	return 0;
 }
 
 static int test_open(struct inode *inode, struct file *file)
@@ -98,20 +84,28 @@ static int test_open(struct inode *inode, struct file *file)
 
 static int __init bt_init(void)
 {
-	struct dentry *d, *t;
+	int ret = -ENOMEM;
+	struct dentry *d = NULL, *t = NULL;
 
 	d = debugfs_create_dir("bloodtest", NULL);
 	if (!d)
-		return -ENOMEM;
+		goto out;
 	t = debugfs_create_file("test", S_IRUSR, d, NULL, &test_fops);
 	if (!t)
-		return -ENOMEM;
+		goto out;
 
 	hrtimer_init(&bt_timer, CLOCK_REALTIME, HRTIMER_MODE_REL);
 	bt_timer.function = bt_timer_fn;
 	bt_ktime = ktime_set(1, 0);
 
-	return 0;
+	ret = bt_perf_init(d);
+
+out:
+	if (ret != 0) {
+		debugfs_remove(t);
+		debugfs_remove(d);
+	}
+	return ret;
 }
 
 core_initcall(bt_init);
diff --git a/kernel/bloodtest/internal.h b/kernel/bloodtest/internal.h
index 48faf4d..f6befc4 100644
--- a/kernel/bloodtest/internal.h
+++ b/kernel/bloodtest/internal.h
@@ -3,17 +3,46 @@
 
 #include <linux/seq_file.h>
 
-enum bt_stat_enum {
-	bt_empty = 0,
-	bt_done,
-	bt_running,
-};
+extern struct rw_semaphore bt_lock;
+
+struct bt_pages {
+	struct page **pages;
+	unsigned int pages_num;
+
+	int node;
 
-extern enum bt_stat_enum bt_stat;
-extern spinlock_t bt_lock;
+	unsigned int entry_size;
+	unsigned int entry_max_per_page;
+
+	void *entry_next;
+	unsigned int entry_count_in_page;
+	unsigned int index;
+};
+extern const struct file_operations bt_pages_bin_fops;
+extern int bt_pages_entry_num_get(void *data, u64 *val);
+extern int bt_pages_page_num_get(void *data, u64 *val);
+extern void bt_pages_clear(struct bt_pages *pages);
+extern int bt_pages_setup(struct bt_pages *pages, unsigned int entry_size,
+			  unsigned int entry_max, int cpu);
+extern void bt_pages_release(struct bt_pages *pages);
+extern void *bt_pages_alloc_entry(struct bt_pages *pages);
+extern void *bt_pages_get_entry(struct bt_pages *pages, unsigned int *index,
+				void *prev_entry);
 
 extern void bt_insert_kernel_stat(void);
 extern void bt_pullout_kernel_stat(void);
 extern void bt_report_kernel_stat(struct seq_file *p);
 
+#ifdef CONFIG_PERF_EVENTS
+extern void bt_insert_perf(void);
+extern void bt_pullout_perf(void);
+extern void bt_task_pullout_perf(void);
+extern int bt_perf_init(struct dentry *d);
+#else
+static inline void bt_insert_perf(void)			{ }
+static inline void bt_pullout_perf(void)		{ }
+static inline void bt_task_pullout_perf(void)		{ }
+static inline int bt_perf_init(struct dentry *d)	{ return 0; }
+#endif
+
 #endif /* _KERNEL_BLOODTEST_INTERNAL_H */
diff --git a/kernel/bloodtest/pages.c b/kernel/bloodtest/pages.c
new file mode 100644
index 0000000..077b7b8
--- /dev/null
+++ b/kernel/bloodtest/pages.c
@@ -0,0 +1,266 @@
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+
+#include "internal.h"
+
+static unsigned int
+bt_pages_entry_num(struct bt_pages *pages)
+{
+	return pages->index * pages->entry_max_per_page
+		+ pages->entry_count_in_page;
+}
+
+static inline unsigned int
+bt_pages_page_num(struct bt_pages *pages)
+{
+	return pages->index + (pages->entry_count_in_page ? 1 : 0);
+}
+
+int bt_pages_entry_num_get(void *data, u64 *val)
+{
+	struct bt_pages *pages = data;
+
+	down_read(&bt_lock);
+
+	*val = (u64)bt_pages_entry_num(pages);
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+int bt_pages_page_num_get(void *data, u64 *val)
+{
+	struct bt_pages *pages = data;
+
+	down_read(&bt_lock);
+
+	*val = (u64)bt_pages_page_num(pages);
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+void bt_pages_clear(struct bt_pages *pages)
+{
+	pages->entry_count_in_page = 0;
+	pages->index = 0;
+	pages->entry_next = page_address(pages->pages[0]);
+}
+
+/* pages must be memset to zero before call this function.  */
+
+int bt_pages_setup(struct bt_pages *pages,
+		   unsigned int entry_size, unsigned int entry_max,
+		   int cpu)
+{
+	int i;
+
+	pages->entry_size = entry_size;
+	pages->entry_max_per_page = PAGE_SIZE / entry_size;
+
+	if (cpu >= 0)
+		pages->node = cpu_to_node(cpu);
+	else
+		pages->node = NUMA_NO_NODE;
+
+	pages->pages_num = entry_max / pages->entry_max_per_page;
+	if (entry_max % pages->entry_max_per_page)
+		pages->pages_num++;
+	pages->pages = kmalloc_node(sizeof(struct page *) * pages->pages_num,
+				    GFP_KERNEL | __GFP_ZERO,
+				    pages->node);
+	if (!pages->pages)
+		return -ENOMEM;
+
+	for (i = 0; i < pages->pages_num; i++) {
+		pages->pages[i] = alloc_pages_node(pages->node,
+						   GFP_KERNEL | __GFP_ZERO,
+						   0);
+		if (!pages->pages[i])
+			return -ENOMEM;
+		clear_page(page_address(pages->pages[i]));
+	}
+
+	bt_pages_clear(pages);
+
+	return 0;
+}
+
+void bt_pages_release(struct bt_pages *pages)
+{
+	int i;
+
+	if (!pages->pages)
+		return;
+
+	for (i = 0; i < pages->pages_num; i++) {
+		if (pages->pages[i])
+			__free_page(pages->pages[i]);
+	}
+
+	kfree(pages->pages);
+
+	memset(pages, 0, sizeof(struct bt_pages));
+}
+
+void *
+bt_pages_alloc_entry(struct bt_pages *pages)
+{
+	void *ret = pages->entry_next;
+
+	if (!ret)
+		goto out;
+
+	pages->entry_count_in_page ++;
+
+	if (pages->entry_count_in_page >= pages->entry_max_per_page) {
+		/* Goto next page.  */
+		pages->index ++;
+		pages->entry_count_in_page = 0;
+		if (pages->index >= pages->pages_num) {
+			/* Pages is full.  */
+			pages->entry_next = NULL;
+		} else
+			pages->entry_next
+				= page_address(pages->pages[pages->index]);
+	} else
+		pages->entry_next += pages->entry_size;
+
+out:
+	return ret;
+}
+
+void *
+bt_pages_get_entry(struct bt_pages *pages, unsigned int *index, void *prev_entry)
+{
+	unsigned int max_size;
+	void *last_entry;
+
+get_entry:
+	if (*index > pages->index)
+		return NULL;
+
+	if (*index == pages->index)
+		max_size = pages->entry_count_in_page * pages->entry_size;
+	else
+		max_size = pages->entry_max_per_page * pages->entry_size;
+
+	if (max_size == 0)
+		return NULL;
+
+	if (!prev_entry)
+		return page_address(pages->pages[*index]);
+
+	last_entry = (void *)(((unsigned long)prev_entry & PAGE_MASK) +
+			      max_size - pages->entry_size);
+
+	if (prev_entry >= last_entry) {
+		/* Goto use next page.  */
+		(*index)++;
+		prev_entry = NULL;
+		goto get_entry;
+	}
+
+	return prev_entry + pages->entry_size;
+}
+
+static int bt_pages_bin_open(struct inode *inode, struct file *file)
+{
+	down_read(&bt_lock);
+
+	file->private_data = inode->i_private;
+
+	return 0;
+}
+
+static int bt_pages_bin_release(struct inode *inode, struct file *file)
+{
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+ssize_t bt_pages_bin_read(struct file *file, char __user *buf,
+			  size_t len, loff_t *ppos)
+{
+	pgoff_t index;
+	loff_t offset;
+	size_t copied = 0;
+	struct bt_pages *pages;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	pages = file->private_data;
+	offset = *ppos;
+	index = offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+
+	for (; index <= pages->index; index++) {
+		size_t copy_size;
+
+		if (len == 0)
+			break;
+
+		/* Get copy_size.  */
+		if (index == pages->index) {
+			copy_size = pages->entry_count_in_page *
+				    pages->entry_size;
+			if (copy_size == 0 || copy_size <= offset)
+				break;
+			copy_size -= offset;
+		} else
+			copy_size = PAGE_SIZE - offset;
+		if (copy_size > len)
+			copy_size = len;
+
+		if (__copy_to_user(buf,
+				   page_address(pages->pages[index]) + offset,
+				   copy_size))
+			return -EFAULT;
+
+		buf += copy_size;
+		len -= copy_size;
+		copied += copy_size;
+		offset = 0;
+	}
+
+	*ppos += copied;
+	return copied;
+}
+
+static int bt_pages_bin_fault(struct vm_fault *vmf)
+{
+	struct file *file = vmf->vma->vm_file;
+	struct bt_pages *pages = file->private_data;
+
+	if (vmf->pgoff >= bt_pages_page_num(pages))
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = pages->pages[vmf->pgoff];
+	return 0;
+}
+
+static const struct vm_operations_struct bt_pages_bin_vm_ops = {
+	.fault		= bt_pages_bin_fault,
+};
+
+int bt_pages_bin_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+		return -EINVAL;
+
+	vma->vm_ops = &bt_pages_bin_vm_ops;
+	return 0;
+}
+
+const struct file_operations bt_pages_bin_fops = {
+	.open		= bt_pages_bin_open,
+	.release	= bt_pages_bin_release,
+	.read		= bt_pages_bin_read,
+	.mmap		= bt_pages_bin_mmap,
+};
diff --git a/kernel/bloodtest/perf.c b/kernel/bloodtest/perf.c
new file mode 100644
index 0000000..cf23844
--- /dev/null
+++ b/kernel/bloodtest/perf.c
@@ -0,0 +1,591 @@
+#include <linux/perf_event.h>
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+
+#include "internal.h"
+
+struct perf_entry {
+	u64 pc;
+	u8 is_user;
+	s16 oom_score_adj;
+	char comm[TASK_COMM_LEN];
+};
+
+static bool perf_on;
+static bool perf_use_freq = true;
+static unsigned int perf_type = PERF_TYPE_SOFTWARE;
+static unsigned int perf_config = PERF_COUNT_SW_CPU_CLOCK;
+static unsigned int perf_period = 1000;
+static unsigned int perf_freq = 200;
+static unsigned int rec_max = 210;
+struct perf_rec {
+	struct perf_event *event;
+
+	atomic_t is_running;
+
+	/* Record entry.  */
+	struct bt_pages pages;
+	unsigned int drop;
+
+	struct dentry *dir;
+	struct dentry *number_fs;
+	struct dentry *page_fs;
+	struct dentry *drop_fs;
+	struct dentry *bin_fs;
+} __percpu *percpu_rec;
+
+struct dentry *perf_dir;
+struct dentry *perf_str_dir;
+
+static int perf_number_get(void *data, u64 *val)
+{
+	unsigned int *number_point = data;
+
+	down_read(&bt_lock);
+
+	*val = (u64)*number_point;
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_number_fops, perf_number_get, NULL, "%llu\n");
+
+static void perf_overflow_handler(struct perf_event *event,
+		struct perf_sample_data *data,
+		struct pt_regs *regs)
+{
+	struct perf_rec *rec = this_cpu_ptr(percpu_rec);
+	struct perf_entry *entry;
+
+	if (!atomic_read(&rec->is_running))
+		return;
+
+	entry = bt_pages_alloc_entry(&rec->pages);
+	if (entry) {
+		struct task_struct *p;
+
+		entry->pc = instruction_pointer(regs);
+		entry->is_user = user_mode(regs);
+
+		rcu_read_lock();
+		if (thread_group_leader(current))
+			p = current;
+		else
+			p = current->group_leader;
+		strncpy(entry->comm, p->comm, sizeof(p->comm));
+		entry->oom_score_adj = p->signal->oom_score_adj;
+		rcu_read_unlock();
+	} else
+		rec->drop++;
+}
+
+static int perf_event_creat(void)
+{
+	int ret = 0;
+	struct perf_event_attr attr;
+	int cpu;
+
+	/* Setup attr.  */
+	memset(&attr, 0, sizeof(attr));
+	attr.type = perf_type;
+	attr.config = perf_config;
+	attr.disabled = true;
+	attr.pinned = true;
+	attr.freq = perf_use_freq;
+	if (attr.freq) {
+		attr.sample_freq = perf_freq;
+		pr_info("bloodtest: perf freq %llu\n",
+			(unsigned long long) attr.sample_freq);
+	} else {
+		attr.sample_period = perf_period;
+		pr_info("bloodtest: perf period %llu\n",
+			(unsigned long long) attr.sample_period);
+	}
+	attr.size = sizeof(attr);
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		/* Set event.  */
+		rec->event = perf_event_create_kernel_counter(&attr, cpu, NULL,
+							perf_overflow_handler,
+							NULL);
+		if (IS_ERR(rec->event)) {
+			ret = PTR_ERR(rec->event);
+			rec->event = NULL;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void perf_event_release(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		if (rec->event) {
+			perf_event_release_kernel(rec->event);
+			rec->event = NULL;
+		}
+	}
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_entry_num_fops, bt_pages_entry_num_get, NULL,
+			"%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_page_num_fops, bt_pages_page_num_get, NULL,
+			"%llu\n");
+
+static int perf_str_show(struct seq_file *p, void *unused)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+		struct perf_entry *entry = NULL;
+		unsigned int index = 0;
+
+		seq_printf(p, "cpu%d\n", cpu);
+		while (1) {
+			char buffer[KSYM_SYMBOL_LEN];
+
+			entry = bt_pages_get_entry(&rec->pages, &index,
+						   entry);
+			if (!entry)
+				break;
+
+			if (entry->is_user)
+				buffer[0] = '\0';
+			else
+				sprint_symbol(buffer, (unsigned long)entry->pc);
+
+			seq_printf(p, "%c %s\t\t%d\t%s[0x%llx]\n",
+				   entry->is_user ? 'u' : 'k',
+				   entry->comm,
+				   (int)entry->oom_score_adj,
+				   buffer,
+				   (unsigned long long)entry->pc);
+		}
+		seq_puts(p, "\n");
+	}
+
+	return 0;
+}
+
+static int perf_str_open(struct inode *inode, struct file *file)
+{
+	down_read(&bt_lock);
+
+	return single_open(file, perf_str_show, NULL);
+}
+
+static int perf_str_release(struct inode *inode, struct file *file)
+{
+	int ret = single_release(inode, file);
+
+	up_read(&bt_lock);
+	return ret;
+}
+
+static const struct file_operations perf_str_fops = {
+	.open		= perf_str_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= perf_str_release,
+};
+
+static int
+perf_pages_alloc(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		int ret;
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		ret = bt_pages_setup(&rec->pages,
+				     sizeof(struct perf_entry),
+				     rec_max, cpu);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+perf_pages_release(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		bt_pages_release(&rec->pages);
+	}
+}
+
+static int perf_alloc(void)
+{
+	int cpu, ret;
+
+	percpu_rec = alloc_percpu(struct perf_rec);
+	if (!percpu_rec)
+		return -ENOMEM;
+
+	/* Init rec.  */
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		memset(rec, 0, sizeof(struct perf_rec));
+	}
+
+	ret = perf_pages_alloc();
+	if (ret)
+		return ret;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+		char name[10];
+
+		snprintf(name, 10, "cpu%d", cpu);
+		rec->dir = debugfs_create_dir(name, perf_dir);
+		if (!rec->dir)
+			return -ENOMEM;
+
+		rec->number_fs = debugfs_create_file("number",
+						     S_IRUSR,
+						     rec->dir,
+						     &rec->pages,
+						     &perf_entry_num_fops);
+		if (!rec->number_fs)
+			return -ENOMEM;
+
+		rec->page_fs = debugfs_create_file("page",
+						   S_IRUSR,
+						   rec->dir,
+						   &rec->pages,
+						   &perf_page_num_fops);
+		if (!rec->page_fs)
+			return -ENOMEM;
+
+		rec->drop_fs = debugfs_create_file("drop",
+						   S_IRUSR,
+						   rec->dir,
+						   &rec->drop,
+						   &perf_number_fops);
+		if (!rec->drop_fs)
+			return -ENOMEM;
+
+		rec->bin_fs = debugfs_create_file("bin",
+						  S_IRUSR,
+						  rec->dir,
+						  &rec->pages,
+						  &bt_pages_bin_fops);
+		if (!rec->bin_fs)
+			return -ENOMEM;
+	}
+
+	perf_str_dir = debugfs_create_file("str",
+					   S_IRUSR,
+					   perf_dir,
+					   NULL,
+					   &perf_str_fops);
+
+	return perf_event_creat();
+}
+
+static void perf_release(void)
+{
+	int cpu;
+
+	if (!percpu_rec)
+		return;
+
+	debugfs_remove(perf_str_dir);
+	perf_str_dir = NULL;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		debugfs_remove(rec->number_fs);
+		debugfs_remove(rec->page_fs);
+		debugfs_remove(rec->drop_fs);
+		debugfs_remove(rec->bin_fs);
+		debugfs_remove(rec->dir);
+	}
+
+	perf_pages_release();
+
+	perf_event_release();
+
+	free_percpu(percpu_rec);
+	percpu_rec = NULL;
+}
+
+static int perf_on_set(void *data, u64 val)
+{
+	int ret = 0;
+
+	down_write(&bt_lock);
+
+	if (!perf_on && val) {
+		ret = perf_alloc();
+		if (ret) {
+			perf_release();
+			goto out;
+		}
+
+		perf_on = true;
+	} else if (perf_on && !val) {
+		perf_release();
+
+		perf_on = false;
+	}
+
+out:
+	up_write(&bt_lock);
+	return ret;
+}
+
+static int perf_on_get(void *data, u64 *val)
+{
+	down_read(&bt_lock);
+
+	if (perf_on)
+		*val = 1;
+	else
+		*val = 0;
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_on_fops, perf_on_get, perf_on_set, "%llu\n");
+
+static int perf_event_set(void *data, u64 val)
+{
+	unsigned int *number_point = data;
+	int ret = 0;
+
+	down_write(&bt_lock);
+
+	*number_point = (unsigned int)val;
+
+	if (number_point == &perf_freq)
+		perf_use_freq = true;
+	else if (number_point == &perf_period)
+		perf_use_freq = false;
+
+	if (perf_on) {
+		perf_event_release();
+		ret = perf_event_creat();
+		if (ret) {
+			pr_err("bloodtest: alloc perf get error %d\n", ret);
+			perf_release();
+			perf_on = false;
+		}
+	}
+
+	up_write(&bt_lock);
+	return ret;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_event_fops,
+			perf_number_get,
+			perf_event_set, "%llu\n");
+
+static int perf_bin_format_show(struct seq_file *p, void *unused)
+{
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	seq_puts(p, "big-endian\n");
+#else
+	seq_puts(p, "little-endian\n");
+#endif
+	seq_printf(p, "size:%lu\n", sizeof(struct perf_entry));
+
+	seq_printf(p, "pc format:u64 unsigned offset:%lu size:%lu\n",
+		   offsetof(struct perf_entry, pc), sizeof(u64));
+	seq_printf(p, "is_user format:u8 unsigned offset:%lu size:%lu\n",
+		   offsetof(struct perf_entry, is_user), sizeof(u8));
+	seq_printf(p, "oom_score_adj format:s16 signed offset:%lu size:%lu\n",
+		   offsetof(struct perf_entry, oom_score_adj), sizeof(s16));
+	seq_printf(p, "comm format:char[] signed offset:%lu size:%d\n",
+		   offsetof(struct perf_entry, comm), TASK_COMM_LEN);
+
+	return 0;
+}
+
+static int perf_bin_format_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perf_bin_format_show, NULL);
+}
+
+static const struct file_operations perf_bin_format_fops = {
+	.open		= perf_bin_format_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int rec_max_set(void *data, u64 val)
+{
+	int ret = 0;
+
+	down_write(&bt_lock);
+	if (rec_max == val)
+		goto out;
+
+	rec_max = val;
+
+	if (!perf_on)
+		goto out;
+
+	perf_pages_release();
+	ret = perf_pages_alloc();
+	if (ret) {
+		perf_release();
+		perf_on = false;
+		goto out;
+	}
+
+out:
+	up_write(&bt_lock);
+	return ret;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(rec_max_fops,
+			perf_number_get,
+			rec_max_set, "%llu\n");
+
+void
+bt_insert_perf(void)
+{
+	int cpu;
+
+	if (!perf_on)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		/* The perf is not running.  So doesn't lock.  */
+		bt_pages_clear(&rec->pages);
+		rec->drop = 0;
+		atomic_set(&rec->is_running, 1);
+		perf_event_enable(rec->event);
+	}
+}
+
+void
+bt_pullout_perf(void)
+{
+	int cpu;
+
+	if (!perf_on)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		/* This function is called by hrtimer that irq is disabled.
+		   Function perf_event_disable will call
+		   smp_call_function_single that should not run when irq is
+		   disabled.
+		   So call perf_event_disable later.  */
+		atomic_set(&rec->is_running, 0);
+	}
+}
+
+void
+bt_task_pullout_perf(void)
+{
+	int cpu;
+
+	if (!perf_on)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_rec *rec = per_cpu_ptr(percpu_rec, cpu);
+
+		perf_event_disable(rec->event);
+	}
+}
+
+
+
+int __init bt_perf_init(struct dentry *f)
+{
+	int ret = -ENOMEM;
+	struct dentry *on = NULL, *format = NULL, *period = NULL,
+		      *freq = NULL, *max = NULL, *type = NULL, *config = NULL;
+
+	perf_dir = debugfs_create_dir("perf", f);
+	if (!perf_dir)
+		goto out;
+
+	on = debugfs_create_file("on", S_IRUSR | S_IWUSR, perf_dir, NULL,
+				 &perf_on_fops);
+	if (!on)
+		goto out;
+
+	format = debugfs_create_file("bin_format", S_IRUSR,
+				     perf_dir, NULL,
+				     &perf_bin_format_fops);
+	if (!format)
+		goto out;
+
+	period = debugfs_create_file("perf_period", S_IRUSR | S_IWUSR, perf_dir,
+				     &perf_period,
+				     &perf_event_fops);
+	if (!period)
+		goto out;
+
+	freq = debugfs_create_file("perf_freq", S_IRUSR | S_IWUSR, perf_dir,
+				   &perf_freq,
+				   &perf_event_fops);
+	if (!freq)
+		goto out;
+
+	type = debugfs_create_file("perf_type", S_IRUSR | S_IWUSR, perf_dir,
+				   &perf_type,
+				   &perf_event_fops);
+	if (!type)
+		goto out;
+
+	config = debugfs_create_file("perf_config", S_IRUSR | S_IWUSR, perf_dir,
+				     &perf_config,
+				     &perf_event_fops);
+	if (!config)
+		goto out;
+
+	max = debugfs_create_file("rec_max", S_IRUSR | S_IWUSR, perf_dir,
+				   &rec_max,
+				   &rec_max_fops);
+	if (!max)
+		goto out;
+
+	ret = 0;
+out:
+	if (ret) {
+		debugfs_remove(on);
+		debugfs_remove(format);
+		debugfs_remove(period);
+		debugfs_remove(freq);
+		debugfs_remove(max);
+		debugfs_remove(type);
+		debugfs_remove(config);
+		debugfs_remove(perf_dir);
+	}
+	return ret;
+}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC 3/4] module: add /proc/modules_update_version
  2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
  2017-10-13  8:56 ` [RFC 1/4] " Hui Zhu
  2017-10-13  8:56 ` [RFC 2/4] BloodTest: perf Hui Zhu
@ 2017-10-13  8:56 ` Hui Zhu
  2017-10-13  8:56 ` [RFC 4/4] BloodTest: task Hui Zhu
  2017-10-23  9:30 ` [RFC 0/4] BloodTest: kernel status Pavel Machek
  4 siblings, 0 replies; 6+ messages in thread
From: Hui Zhu @ 2017-10-13  8:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: teawater, Hui Zhu

With "BloodTest: perf", we can get the address of kernel from "cpu0/page"
without symbol.
The application that call BloodTest need translate the address to symbol
with itself.  For normal address, just vmlinux is OK to get the right
symbol.  But for the address of kernel module, it also need the address
of modules from /proc/modules.

Add /proc/modules_update_version will help the application to get if the
kernel modules address is changed or not.

Signed-off-by: Hui Zhu <zhuhui@xiaomi.com>
---
 kernel/module.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index de66ec8..ed6f370 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -317,6 +317,8 @@ struct load_info {
 	} index;
 };
 
+static atomic_t modules_update_version = ATOMIC_INIT(0);
+
 /*
  * We require a truly strong try_module_get(): 0 means success.
  * Otherwise an error is returned due to ongoing or failed
@@ -1020,6 +1022,9 @@ int module_refcount(struct module *mod)
 	strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
 
 	free_module(mod);
+
+	atomic_inc(&modules_update_version);
+
 	return 0;
 out:
 	mutex_unlock(&module_mutex);
@@ -3183,6 +3188,8 @@ static int move_module(struct module *mod, struct load_info *info)
 			 (long)shdr->sh_addr, info->secstrings + shdr->sh_name);
 	}
 
+	atomic_inc(&modules_update_version);
+
 	return 0;
 }
 
@@ -4196,9 +4203,21 @@ static int modules_open(struct inode *inode, struct file *file)
 	.release	= seq_release,
 };
 
+static int modules_update_version_get(void *data, u64 *val)
+{
+	*val = (u64)atomic_read(&modules_update_version);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(proc_modules_update_version_operations,
+			modules_update_version_get, NULL, "%llu\n");
+
 static int __init proc_modules_init(void)
 {
 	proc_create("modules", 0, NULL, &proc_modules_operations);
+	proc_create("modules_update_version", 0, NULL,
+		    &proc_modules_update_version_operations);
 	return 0;
 }
 module_init(proc_modules_init);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [RFC 4/4] BloodTest: task
  2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
                   ` (2 preceding siblings ...)
  2017-10-13  8:56 ` [RFC 3/4] module: add /proc/modules_update_version Hui Zhu
@ 2017-10-13  8:56 ` Hui Zhu
  2017-10-23  9:30 ` [RFC 0/4] BloodTest: kernel status Pavel Machek
  4 siblings, 0 replies; 6+ messages in thread
From: Hui Zhu @ 2017-10-13  8:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: teawater, Hui Zhu

This patch add the function that get the infomation that task use the
resource of system for example cpu time, read_bytes, write_bytes.
The interface is in "/sys/kernel/debug/bloodtest/task".
"on" is the switch.  When it set to 1, access "test" will record task
infomation.
After record, access "str" will get the record data in string.
Access "page" will get the record data in binary that is format is
in "bin_format".

Signed-off-by: Hui Zhu <zhuhui@xiaomi.com>
---
 include/linux/bloodtest.h   |  10 +
 kernel/bloodtest/Makefile   |   2 +-
 kernel/bloodtest/core.c     |  21 +++
 kernel/bloodtest/internal.h |  13 ++
 kernel/bloodtest/perf.c     |  33 +---
 kernel/bloodtest/task.c     | 447 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c               |   4 +
 7 files changed, 505 insertions(+), 25 deletions(-)
 create mode 100644 include/linux/bloodtest.h
 create mode 100644 kernel/bloodtest/task.c

diff --git a/include/linux/bloodtest.h b/include/linux/bloodtest.h
new file mode 100644
index 0000000..55f4ebc
--- /dev/null
+++ b/include/linux/bloodtest.h
@@ -0,0 +1,10 @@
+#ifndef __LINUX_BLOODTEST_H
+#define __LINUX_BLOODTEST_H
+
+#ifdef CONFIG_BLOODTEST
+extern void bt_task_exit_record(struct task_struct *p);
+#else
+static inline void bt_task_exit_record(struct task_struct *p)	{ }
+#endif
+
+#endif /* __LINUX_BLOODTEST_H */
diff --git a/kernel/bloodtest/Makefile b/kernel/bloodtest/Makefile
index 79b7ea0..a6f1a7a 100644
--- a/kernel/bloodtest/Makefile
+++ b/kernel/bloodtest/Makefile
@@ -1,3 +1,3 @@
-obj-y	= core.o pages.o kernel_stat.o
+obj-y	= core.o pages.o kernel_stat.o task.o
 
 obj-$(CONFIG_PERF_EVENTS) += perf.o
diff --git a/kernel/bloodtest/core.c b/kernel/bloodtest/core.c
index 5ba800c..6cfcdf2 100644
--- a/kernel/bloodtest/core.c
+++ b/kernel/bloodtest/core.c
@@ -16,6 +16,7 @@
 /* This function must be called under the protection of bt_lock.  */
 static void bt_insert(void)
 {
+	bt_insert_task();
 	bt_insert_perf();
 	bt_insert_kernel_stat();
 }
@@ -25,6 +26,7 @@ static void bt_pullout(void)
 {
 	bt_pullout_kernel_stat();
 	bt_pullout_perf();
+	bt_pullout_task();
 }
 
 /* This function must be called under the protection of bt_lock.  */
@@ -99,13 +101,32 @@ static int __init bt_init(void)
 	bt_ktime = ktime_set(1, 0);
 
 	ret = bt_perf_init(d);
+	if (ret < 0)
+		goto out;
+
+	ret = bt_task_init(d);
 
 out:
 	if (ret != 0) {
 		debugfs_remove(t);
 		debugfs_remove(d);
+		pr_err("bloodtest: init get error %d\n", ret);
 	}
 	return ret;
 }
 
 core_initcall(bt_init);
+
+int bt_number_get(void *data, u64 *val)
+{
+	unsigned int *number_point = data;
+
+	down_read(&bt_lock);
+
+	*val = (u64)*number_point;
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
diff --git a/kernel/bloodtest/internal.h b/kernel/bloodtest/internal.h
index f6befc4..5aacf37 100644
--- a/kernel/bloodtest/internal.h
+++ b/kernel/bloodtest/internal.h
@@ -3,6 +3,13 @@
 
 #include <linux/seq_file.h>
 
+#define SHOW_FORMAT_1(p, s, entry, type, sign, size) \
+	seq_printf(p, "%s format:%s %s offset:%lu size:%lu\n", \
+		   #entry, #type, sign, offsetof(s, entry), \
+		   (unsigned long)size)
+#define SHOW_FORMAT(p, s, entry, type, sign) \
+	SHOW_FORMAT_1(p, s, entry, type, sign, sizeof(type))
+
 extern struct rw_semaphore bt_lock;
 
 struct bt_pages {
@@ -45,4 +52,10 @@ static inline void bt_task_pullout_perf(void)		{ }
 static inline int bt_perf_init(struct dentry *d)	{ return 0; }
 #endif
 
+extern void bt_insert_task(void);
+extern void bt_pullout_task(void);
+extern int bt_task_init(struct dentry *d);
+
+extern int bt_number_get(void *data, u64 *val);
+
 #endif /* _KERNEL_BLOODTEST_INTERNAL_H */
diff --git a/kernel/bloodtest/perf.c b/kernel/bloodtest/perf.c
index cf23844..d495258 100644
--- a/kernel/bloodtest/perf.c
+++ b/kernel/bloodtest/perf.c
@@ -40,20 +40,7 @@ struct perf_rec {
 struct dentry *perf_dir;
 struct dentry *perf_str_dir;
 
-static int perf_number_get(void *data, u64 *val)
-{
-	unsigned int *number_point = data;
-
-	down_read(&bt_lock);
-
-	*val = (u64)*number_point;
-
-	up_read(&bt_lock);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(perf_number_fops, perf_number_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(perf_number_fops, bt_number_get, NULL, "%llu\n");
 
 static void perf_overflow_handler(struct perf_event *event,
 		struct perf_sample_data *data,
@@ -402,7 +389,7 @@ static int perf_event_set(void *data, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(perf_event_fops,
-			perf_number_get,
+			bt_number_get,
 			perf_event_set, "%llu\n");
 
 static int perf_bin_format_show(struct seq_file *p, void *unused)
@@ -412,16 +399,14 @@ static int perf_bin_format_show(struct seq_file *p, void *unused)
 #else
 	seq_puts(p, "little-endian\n");
 #endif
+	seq_printf(p, "page_size:%lu\n", PAGE_SIZE);
 	seq_printf(p, "size:%lu\n", sizeof(struct perf_entry));
 
-	seq_printf(p, "pc format:u64 unsigned offset:%lu size:%lu\n",
-		   offsetof(struct perf_entry, pc), sizeof(u64));
-	seq_printf(p, "is_user format:u8 unsigned offset:%lu size:%lu\n",
-		   offsetof(struct perf_entry, is_user), sizeof(u8));
-	seq_printf(p, "oom_score_adj format:s16 signed offset:%lu size:%lu\n",
-		   offsetof(struct perf_entry, oom_score_adj), sizeof(s16));
-	seq_printf(p, "comm format:char[] signed offset:%lu size:%d\n",
-		   offsetof(struct perf_entry, comm), TASK_COMM_LEN);
+	SHOW_FORMAT(p, struct perf_entry, pc, u64, "unsigned");
+	SHOW_FORMAT(p, struct perf_entry, is_user, u8, "unsigned");
+	SHOW_FORMAT(p, struct perf_entry, oom_score_adj, s16, "signed");
+	SHOW_FORMAT_1(p, struct perf_entry, comm, char[], "signed",
+		      TASK_COMM_LEN);
 
 	return 0;
 }
@@ -465,7 +450,7 @@ static int rec_max_set(void *data, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(rec_max_fops,
-			perf_number_get,
+			bt_number_get,
 			rec_max_set, "%llu\n");
 
 void
diff --git a/kernel/bloodtest/task.c b/kernel/bloodtest/task.c
new file mode 100644
index 0000000..b44c892
--- /dev/null
+++ b/kernel/bloodtest/task.c
@@ -0,0 +1,447 @@
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
+
+#include "internal.h"
+
+#define PROCESS_ERROR	1
+#define PROCESS_INSERT	2
+#define PROCESS_PULLOUT	4
+#define PROCESS_EXIT	8
+
+struct process_entry {
+	u8 status;
+	pid_t pid;
+	char comm[TASK_COMM_LEN];
+
+	u64 utime;
+	u64 stime;
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	u64 read_bytes;
+	u64 write_bytes;
+	u64 cancelled_write_bytes;
+#endif
+};
+
+static bool rec_on;
+static unsigned int rec_max = 1000;
+static DEFINE_SPINLOCK(rec_lock);
+static bool rec_running;
+static struct bt_pages rec_pages;
+static unsigned int rec_drop;
+
+static struct dentry *task_dir;
+static struct dentry *bin_dir;
+static struct dentry *str_dir;
+static struct dentry *number_dir;
+static struct dentry *page_dir;
+static struct dentry *drop_dir;
+
+static int task_bin_format_show(struct seq_file *p, void *unused)
+{
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	seq_puts(p, "big-endian\n");
+#else
+	seq_puts(p, "little-endian\n");
+#endif
+	seq_printf(p, "page_size:%lu\n", PAGE_SIZE);
+	seq_printf(p, "size:%lu\n", sizeof(struct process_entry));
+
+	SHOW_FORMAT(p, struct process_entry, status, u8, "unsigned");
+	SHOW_FORMAT(p, struct process_entry, pid, pid_t, "signed");
+	SHOW_FORMAT_1(p, struct process_entry, comm, char[], "signed",
+		      TASK_COMM_LEN);
+
+	SHOW_FORMAT(p, struct process_entry, utime, u64, "unsigned");
+	SHOW_FORMAT(p, struct process_entry, stime, u64, "unsigned");
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	SHOW_FORMAT(p, struct process_entry, read_bytes, u64, "unsigned");
+	SHOW_FORMAT(p, struct process_entry, write_bytes, u64, "unsigned");
+	SHOW_FORMAT(p, struct process_entry, cancelled_write_bytes, u64,
+		    "unsigned");
+#endif
+
+	return 0;
+}
+
+static int task_bin_format_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, task_bin_format_show, NULL);
+}
+
+static const struct file_operations task_bin_format_fops = {
+	.open		= task_bin_format_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int task_str_show(struct seq_file *p, void *unused)
+{
+	struct process_entry *entry = NULL;
+	unsigned int index = 0;
+
+	while (1) {
+		entry = bt_pages_get_entry(&rec_pages, &index,
+					   entry);
+		if (!entry)
+			break;
+
+		seq_printf(p, "comm:%s pid:%d\n", entry->comm, entry->pid);
+		seq_puts(p, "status:");
+		if (entry->status & PROCESS_ERROR)
+			seq_puts(p, "PROCESS_ERROR | ");
+		if (entry->status & PROCESS_INSERT)
+			seq_puts(p, "PROCESS_INSERT");
+		if (entry->status & PROCESS_PULLOUT)
+			seq_puts(p, "PROCESS_PULLOUT");
+		if (entry->status & PROCESS_EXIT)
+			seq_puts(p, "PROCESS_EXIT");
+		seq_puts(p, "\n");
+
+		seq_printf(p, "utime:%lld stime:%lld\n",
+			   (unsigned long long)entry->utime,
+			   (unsigned long long)entry->stime);
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		seq_printf(p, "read_bytes:%lld",
+			   (unsigned long long)entry->read_bytes);
+		seq_printf(p, " write_bytes:%lld",
+			   (unsigned long long)entry->write_bytes);
+		seq_printf(p, " cancelled_write_bytes:%lld\n",
+			   (unsigned long long)entry->cancelled_write_bytes);
+#endif
+		seq_puts(p, "\n");
+	}
+
+	return 0;
+}
+
+static int task_str_open(struct inode *inode, struct file *file)
+{
+	down_read(&bt_lock);
+
+	return single_open(file, task_str_show, NULL);
+}
+
+static int task_str_release(struct inode *inode, struct file *file)
+{
+	int ret = single_release(inode, file);
+
+	up_read(&bt_lock);
+	return ret;
+}
+
+static const struct file_operations task_str_fops = {
+	.open		= task_str_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= task_str_release,
+};
+
+DEFINE_SIMPLE_ATTRIBUTE(process_entry_num_fops, bt_pages_entry_num_get, NULL,
+			"%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(process_page_num_fops, bt_pages_page_num_get, NULL,
+			"%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(rec_number_fops,
+			bt_number_get,
+			NULL, "%llu\n");
+
+static int task_pages_alloc(void)
+{
+	return bt_pages_setup(&rec_pages, sizeof(struct process_entry),
+			      rec_max, -1);
+}
+
+static void task_pages_release(void)
+{
+	bt_pages_release(&rec_pages);
+}
+
+static int task_alloc(void)
+{
+	int ret;
+
+	ret = task_pages_alloc();
+	if (ret)
+		goto out;
+
+	ret = -ENOMEM;
+
+	bin_dir = debugfs_create_file("bin", S_IRUSR, task_dir,
+				      &rec_pages, &bt_pages_bin_fops);
+	if (!bin_dir)
+		goto out;
+
+	str_dir = debugfs_create_file("str", S_IRUSR, task_dir,
+				      NULL, &task_str_fops);
+	if (!str_dir)
+		goto out;
+
+	number_dir = debugfs_create_file("number", S_IRUSR, task_dir,
+					 &rec_pages, &process_entry_num_fops);
+	if (!number_dir)
+		goto out;
+
+	page_dir = debugfs_create_file("page", S_IRUSR, task_dir,
+				       &rec_pages, &process_page_num_fops);
+	if (!page_dir)
+		goto out;
+
+	drop_dir = debugfs_create_file("drop", S_IRUSR, task_dir,
+				       &rec_drop, &rec_number_fops);
+	if (!drop_dir)
+		goto out;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static void task_release(void)
+{
+	debugfs_remove(bin_dir);
+	debugfs_remove(str_dir);
+	debugfs_remove(number_dir);
+	debugfs_remove(page_dir);
+	debugfs_remove(drop_dir);
+
+	task_pages_release();
+}
+
+static int task_on_set(void *data, u64 val)
+{
+	int ret = 0;
+
+	down_write(&bt_lock);
+
+	if (!rec_on && val) {
+		ret = task_alloc();
+		if (ret) {
+			task_release();
+			goto out;
+		}
+
+		rec_on = true;
+	} else if (rec_on && !val) {
+		task_release();
+
+		rec_on = false;
+	}
+
+out:
+	up_write(&bt_lock);
+	return ret;
+}
+
+static int task_on_get(void *data, u64 *val)
+{
+	down_read(&bt_lock);
+
+	if (rec_on)
+		*val = 1;
+	else
+		*val = 0;
+
+	up_read(&bt_lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(task_on_fops, task_on_get, task_on_set, "%llu\n");
+
+static int rec_max_set(void *data, u64 val)
+{
+	int ret = 0;
+
+	down_write(&bt_lock);
+	if (rec_max == val)
+		goto out;
+
+	rec_max = val;
+
+	if (!rec_on)
+		goto out;
+
+	task_pages_release();
+	ret = task_pages_alloc();
+	if (ret) {
+		task_release();
+		rec_on = false;
+		goto out;
+	}
+
+out:
+	up_write(&bt_lock);
+	return ret;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(rec_max_fops,
+			bt_number_get,
+			rec_max_set, "%llu\n");
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static void
+entry_io_accounting_add(struct process_entry *entry,
+			struct task_io_accounting *io)
+{
+	entry->read_bytes += io->read_bytes;
+	entry->write_bytes += io->write_bytes;
+	entry->cancelled_write_bytes += io->cancelled_write_bytes;
+}
+#endif
+
+static void
+task_record_1(u8 status, struct task_struct *p)
+{
+	struct process_entry *entry;
+	unsigned long flags;
+
+	entry = bt_pages_alloc_entry(&rec_pages);
+	if (!entry) {
+		rec_drop++;
+		return;
+	}
+
+	entry->status = status;
+	entry->pid = p->pid;
+	strncpy(entry->comm, p->comm, sizeof(p->comm));
+
+	if (lock_task_sighand(p, &flags)) {
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		struct task_struct *t = p;
+#endif
+
+		thread_group_cputime_adjusted(p, &entry->utime,
+					      &entry->stime);
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		entry->read_bytes = 0;
+		entry->write_bytes = 0;
+		entry->cancelled_write_bytes = 0;
+		entry_io_accounting_add(entry, &p->ioac);
+		entry_io_accounting_add(entry, &p->signal->ioac);
+		while_each_thread(p, t)
+			entry_io_accounting_add(entry, &t->signal->ioac);
+#endif
+		unlock_task_sighand(p, &flags);
+	} else {
+		entry->status |= PROCESS_ERROR;
+		entry->utime = 0;
+		entry->stime = 0;
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		entry->read_bytes = 0;
+		entry->write_bytes = 0;
+		entry->cancelled_write_bytes = 0;
+#endif
+	}
+}
+
+static void
+task_record(u8 status)
+{
+	struct task_struct *p;
+
+	rcu_read_lock();
+	for_each_process(p)
+		task_record_1(status, p);
+	rcu_read_unlock();
+}
+
+void bt_insert_task(void)
+{
+	unsigned long flags;
+
+	if (!rec_on)
+		return;
+
+	spin_lock_irqsave(&rec_lock, flags);
+
+	bt_pages_clear(&rec_pages);
+	rec_drop = 0;
+	rec_running = true;
+	task_record(PROCESS_INSERT);
+
+	spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+void bt_pullout_task(void)
+{
+	unsigned long flags;
+
+	if (!rec_on)
+		return;
+
+	spin_lock_irqsave(&rec_lock, flags);
+
+	task_record(PROCESS_PULLOUT);
+	rec_running = false;
+
+	spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+void bt_task_exit_record(struct task_struct *p)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rec_lock, flags);
+
+	if (!rec_running)
+		goto out;
+
+	rcu_read_lock();
+
+	if (!thread_group_leader(current))
+		p = current->group_leader;
+
+	task_record_1(PROCESS_EXIT, p);
+
+	rcu_read_unlock();
+
+out:
+	spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+int __init bt_task_init(struct dentry *f)
+{
+	int ret = -ENOMEM;
+	struct dentry *on = NULL, *format = NULL, *max = NULL;
+
+	memset(&rec_pages, 0, sizeof(struct bt_pages));
+
+	task_dir = debugfs_create_dir("task", f);
+	if (!task_dir)
+		goto out;
+
+	on = debugfs_create_file("on", S_IRUSR | S_IWUSR, task_dir, NULL,
+				 &task_on_fops);
+	if (!on)
+		goto out;
+
+	format = debugfs_create_file("bin_format", S_IRUSR,
+				     task_dir, NULL,
+				     &task_bin_format_fops);
+	if (!format)
+		return -ENOMEM;
+
+	max = debugfs_create_file("rec_max", S_IRUSR | S_IWUSR, task_dir,
+				   &rec_max,
+				   &rec_max_fops);
+	if (!max)
+		goto out;
+
+	ret = 0;
+out:
+	if (ret) {
+		debugfs_remove(on);
+		debugfs_remove(format);
+		debugfs_remove(max);
+		debugfs_remove(task_dir);
+	}
+	return ret;
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index f2cd53e..513de91 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -62,6 +62,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/bloodtest.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -855,6 +856,9 @@ void __noreturn do_exit(long code)
 		acct_process();
 	trace_sched_process_exit(tsk);
 
+	if (group_dead)
+		bt_task_exit_record(tsk);
+
 	exit_sem(tsk);
 	exit_shm(tsk);
 	exit_files(tsk);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC 0/4] BloodTest: kernel status
  2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
                   ` (3 preceding siblings ...)
  2017-10-13  8:56 ` [RFC 4/4] BloodTest: task Hui Zhu
@ 2017-10-23  9:30 ` Pavel Machek
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Machek @ 2017-10-23  9:30 UTC (permalink / raw)
  To: Hui Zhu; +Cc: linux-kernel, teawater

[-- Attachment #1: Type: text/plain, Size: 1208 bytes --]

Hi!

> BloodTest: an interface to call other analysing tools
> 
> Linux kernel has a lot of analysing tools, perf, ftrace, systemtap, KGTP
> and so on.
> And kernel also supplies a lot of internal value from procfs and sysfs
> to analyse the performance.

>  fs/proc/stat.c                 |    8 
>  include/linux/bloodtest.h      |   10 
>  include/linux/kernel_stat.h    |    3 
>  init/Kconfig                   |    3 
>  kernel/Makefile                |    2 
>  kernel/bloodtest/Makefile      |    3 
>  kernel/bloodtest/core.c        |  132 +++++++++
>  kernel/bloodtest/internal.h    |   61 ++++
>  kernel/bloodtest/kernel_stat.c |   62 ++++
>  kernel/bloodtest/pages.c       |  266 ++++++++++++++++++
>  kernel/bloodtest/perf.c        |  576 +++++++++++++++++++++++++++++++++++++++++
>  kernel/bloodtest/task.c        |  447 +++++++++++++++++++++++++++++++
>  kernel/exit.c                  |    4 
>  kernel/module.c                |   19 +
>  14 files changed, 1592 insertions(+), 4 deletions(-)

No documentation?
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-10-23  9:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-13  8:56 [RFC 0/4] BloodTest: kernel status Hui Zhu
2017-10-13  8:56 ` [RFC 1/4] " Hui Zhu
2017-10-13  8:56 ` [RFC 2/4] BloodTest: perf Hui Zhu
2017-10-13  8:56 ` [RFC 3/4] module: add /proc/modules_update_version Hui Zhu
2017-10-13  8:56 ` [RFC 4/4] BloodTest: task Hui Zhu
2017-10-23  9:30 ` [RFC 0/4] BloodTest: kernel status Pavel Machek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).