Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
@ 2020-05-06 13:39 Jan Kara
  2020-05-06 13:39 ` [PATCH 1/3] blkparse: Handle cgroup information Jan Kara
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-06 13:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Jan Kara

I was investigating a performance issue with BFQ IO scheduler and I was
pondering why I'm not seeing informational messages from BFQ. After quite
some debugging I have found out that commit 35fe6d763229 "block: use
standard blktrace API to output cgroup info for debug notes" broke standard
blktrace API - namely the informational messages logged by bfq_log_bfqq()
are no longer displayed by blkparse(8) tool. This is because these messages
have now __BLK_TA_CGROUP bit set and that breaks flags checking in
blkparse(1) and iowatcher(1). This series fixes both tools to be able to
cope with events with __BLK_TA_CGROUP flag set.

								Honza

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/3] blkparse: Handle cgroup information
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
@ 2020-05-06 13:39 ` Jan Kara
  2020-05-06 13:39 ` [PATCH 2/3] iowatcher: Use blktrace_api.h Jan Kara
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-06 13:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Jan Kara

Since Linux kernel commit 35fe6d763229 "block: use standard blktrace API
to output cgroup info for debug notes" the kernel can pass
__BLK_TA_CGROUP flag in the action field of generated events. blkparse
does not count with this and so it will get confused by such events and
either ignore them or misreport them. Teach blkparse how to properly
process events with __BLK_TA_CGROUP flag.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 blkparse.c       | 41 ++++++++++++++++++++++++++---------------
 blkparse_fmt.c   | 15 +++++++++++++++
 blktrace_api.h   | 10 ++++++++++
 doc/blkparse.1   |  4 ++++
 doc/blktrace.tex |  3 +++
 5 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/blkparse.c b/blkparse.c
index 28bdf1543ddb..ae4cb4433944 100644
--- a/blkparse.c
+++ b/blkparse.c
@@ -597,7 +597,7 @@ static void handle_notify(struct blk_io_trace *bit)
 	void	*payload = (caddr_t) bit + sizeof(*bit);
 	__u32	two32[2];
 
-	switch (bit->action) {
+	switch (bit->action & ~__BLK_TN_CGROUP) {
 	case BLK_TN_PROCESS:
 		add_ppm_hash(bit->pid, payload);
 		break;
@@ -623,16 +623,27 @@ static void handle_notify(struct blk_io_trace *bit)
 	case BLK_TN_MESSAGE:
 		if (bit->pdu_len > 0) {
 			char msg[bit->pdu_len+1];
+			int len = bit->pdu_len;
+			char cgidstr[24];
 
-			memcpy(msg, (char *)payload, bit->pdu_len);
-			msg[bit->pdu_len] = '\0';
+			cgidstr[0] = 0;
+			if (bit->action & __BLK_TN_CGROUP) {
+				struct blk_io_cgroup_payload *cgid = payload;
+
+				sprintf(cgidstr, "%x,%x ", cgid->ino,
+					cgid->gen);
+				payload += sizeof(struct blk_io_cgroup_payload);
+				len -= sizeof(struct blk_io_cgroup_payload);
+			}
+			memcpy(msg, (char *)payload, len);
+			msg[len] = '\0';
 
 			fprintf(ofp,
-				"%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+				"%3d,%-3d %2d %8s %5d.%09lu %5u %s%2s %3s %s\n",
 				MAJOR(bit->device), MINOR(bit->device),
-				bit->cpu, "0", (int) SECONDS(bit->time),
-				(unsigned long) NANO_SECONDS(bit->time),
-				0, "m", "N", msg);
+				bit->cpu, "0", (int)SECONDS(bit->time),
+				(unsigned long)NANO_SECONDS(bit->time),
+				0, cgidstr, "m", "N", msg);
 		}
 		break;
 
@@ -1511,7 +1522,7 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
 			  struct per_cpu_info *pci)
 {
 	int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
-	int act = t->action & 0xffff;
+	int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
 
 	switch (act) {
 		case __BLK_TA_QUEUE:
@@ -1560,7 +1571,7 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
 			  struct per_cpu_info *pci)
 {
 	int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
-	int act = t->action & 0xffff;
+	int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
 
 	switch (act) {
 		case __BLK_TA_QUEUE:
@@ -1643,7 +1654,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
 		       struct per_dev_info *pdi)
 {
 	if (text_output) {
-		if (t->action == BLK_TN_MESSAGE)
+		if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE)
 			handle_notify(t);
 		else if (t->action & BLK_TC_ACT(BLK_TC_PC))
 			dump_trace_pc(t, pdi, pci);
@@ -1658,7 +1669,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
 
 	if (bin_output_msgs ||
 			    !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
-			      t->action == BLK_TN_MESSAGE))
+			      (t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
 		output_binary(t, sizeof(*t) + t->pdu_len);
 }
 
@@ -2234,7 +2245,7 @@ static void show_entries_rb(int force)
 			break;
 		}
 
-		if (!(bit->action == BLK_TN_MESSAGE) &&
+		if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) &&
 		    check_sequence(pdi, t, force))
 			break;
 
@@ -2246,7 +2257,7 @@ static void show_entries_rb(int force)
 		if (!pci || pci->cpu != bit->cpu)
 			pci = get_cpu_info(pdi, bit->cpu);
 
-		if (!(bit->action == BLK_TN_MESSAGE))
+		if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
 			pci->last_sequence = bit->sequence;
 
 		pci->nelems++;
@@ -2380,7 +2391,7 @@ static int read_events(int fd, int always_block, int *fdblock)
 		/*
 		 * not a real trace, so grab and handle it here
 		 */
-		if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+		if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
 			handle_notify(bit);
 			output_binary(bit, sizeof(*bit) + bit->pdu_len);
 			continue;
@@ -2529,7 +2540,7 @@ static int ms_prime(struct ms_stream *msp)
 			continue;
 		}
 
-		if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+		if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
 			handle_notify(bit);
 			output_binary(bit, sizeof(*bit) + bit->pdu_len);
 			bit_free(bit);
diff --git a/blkparse_fmt.c b/blkparse_fmt.c
index c42e6d7b7219..df2f6ce2148a 100644
--- a/blkparse_fmt.c
+++ b/blkparse_fmt.c
@@ -205,6 +205,21 @@ static void print_field(char *act, struct per_cpu_info *pci,
 	case 'e':
 		fprintf(ofp, strcat(format, "d"), t->error);
 		break;
+	case 'g': {
+		char cgidstr[24];
+		u32 ino = 0, gen = 0;
+
+		if (t->action & __BLK_TA_CGROUP) {
+			struct blk_io_cgroup_payload *cgid =
+				(struct blk_io_cgroup_payload *)pdu_buf;
+
+			ino = cgid->ino;
+			gen = cgid->gen;
+		}
+		sprintf(cgidstr, "%x,%x", ino, gen);
+		fprintf(ofp, strcat(format, "s"), cgidstr);
+		break;
+	}
 	case 'M':
 		fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
 		break;
diff --git a/blktrace_api.h b/blktrace_api.h
index b22221828f41..8c760b8dd260 100644
--- a/blktrace_api.h
+++ b/blktrace_api.h
@@ -51,6 +51,7 @@ enum {
 	__BLK_TA_REMAP,			/* bio was remapped */
 	__BLK_TA_ABORT,			/* request aborted */
 	__BLK_TA_DRV_DATA,		/* binary driver data */
+	__BLK_TA_CGROUP = 1 << 8,
 };
 
 /*
@@ -60,6 +61,7 @@ enum blktrace_notify {
 	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
 	__BLK_TN_TIMESTAMP,		/* include system clock */
 	__BLK_TN_MESSAGE,               /* Character string message */
+	__BLK_TN_CGROUP = __BLK_TA_CGROUP,
 };
 
 /*
@@ -116,6 +118,14 @@ struct blk_io_trace_remap {
 	__u64 sector_from;
 };
 
+/*
+ * Payload with originating cgroup info
+ */
+struct blk_io_cgroup_payload {
+	__u32 ino;
+	__u32 gen;
+};
+
 /*
  * User setup structure passed with BLKSTARTTRACE
  */
diff --git a/doc/blkparse.1 b/doc/blkparse.1
index e494b6eca223..4c26baffc56c 100644
--- a/doc/blkparse.1
+++ b/doc/blkparse.1
@@ -332,6 +332,10 @@ the event's device (separated by a comma).
 .IP \fBe\fR 4
 Error value
 
+.IP \fBg\fR 4
+Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate kernel support (kernel version at least 4.14).
+
 .IP \fBm\fR 4
 Minor number of event's device.
 
diff --git a/doc/blktrace.tex b/doc/blktrace.tex
index 3647c7502142..836ac4a35f86 100644
--- a/doc/blktrace.tex
+++ b/doc/blktrace.tex
@@ -601,6 +601,9 @@ Specifier & \\ \hline\hline
 the event's device \\
          & (separated by a comma). \\ \hline
 \emph{e} & Error value \\ \hline
+\emph{g} & Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate \\
+         & kernel support (kernel version at least 4.14). \\ \hline
 \emph{m} & Minor number of event's device. \\ \hline
 \emph{M} & Major number of event's device. \\ \hline
 \emph{n} & Number of blocks \\ \hline
-- 
2.16.4


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/3] iowatcher: Use blktrace_api.h
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
  2020-05-06 13:39 ` [PATCH 1/3] blkparse: Handle cgroup information Jan Kara
@ 2020-05-06 13:39 ` Jan Kara
  2020-05-06 13:39 ` [PATCH 3/3] iowatcher: Handle cgroup information Jan Kara
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-06 13:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Jan Kara

Use blktrace_api.h header instead of redefining the constants once more
in blkparse.c.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 iowatcher/blkparse.c | 104 +--------------------------------------------------
 1 file changed, 1 insertion(+), 103 deletions(-)

diff --git a/iowatcher/blkparse.c b/iowatcher/blkparse.c
index 41e20f0c01f9..982de9459826 100644
--- a/iowatcher/blkparse.c
+++ b/iowatcher/blkparse.c
@@ -36,6 +36,7 @@
 #include "blkparse.h"
 #include "list.h"
 #include "tracers.h"
+#include "../blktrace_api.h"
 
 #define IO_HASH_TABLE_BITS  11
 #define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
@@ -49,112 +50,9 @@ static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
 extern int plot_io_action;
 extern int io_per_process;
 
-/*
- * Trace categories
- */
-enum {
-	BLK_TC_READ	= 1 << 0,	/* reads */
-	BLK_TC_WRITE	= 1 << 1,	/* writes */
-	BLK_TC_FLUSH	= 1 << 2,	/* flush */
-	BLK_TC_SYNC	= 1 << 3,	/* sync */
-	BLK_TC_QUEUE	= 1 << 4,	/* queueing/merging */
-	BLK_TC_REQUEUE	= 1 << 5,	/* requeueing */
-	BLK_TC_ISSUE	= 1 << 6,	/* issue */
-	BLK_TC_COMPLETE	= 1 << 7,	/* completions */
-	BLK_TC_FS	= 1 << 8,	/* fs requests */
-	BLK_TC_PC	= 1 << 9,	/* pc requests */
-	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
-	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
-	BLK_TC_META	= 1 << 12,	/* metadata */
-	BLK_TC_DISCARD	= 1 << 13,	/* discard requests */
-	BLK_TC_DRV_DATA	= 1 << 14,	/* binary driver data */
-	BLK_TC_FUA	= 1 << 15,	/* fua requests */
-
-	BLK_TC_END	= 1 << 15,	/* we've run out of bits! */
-};
-
-#define BLK_TC_SHIFT		(16)
-#define BLK_TC_ACT(act)		((act) << BLK_TC_SHIFT)
 #define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
-
-/*
- * Basic trace actions
- */
-enum {
-	__BLK_TA_QUEUE = 1,		/* queued */
-	__BLK_TA_BACKMERGE,		/* back merged to existing rq */
-	__BLK_TA_FRONTMERGE,		/* front merge to existing rq */
-	__BLK_TA_GETRQ,			/* allocated new request */
-	__BLK_TA_SLEEPRQ,		/* sleeping on rq allocation */
-	__BLK_TA_REQUEUE,		/* request requeued */
-	__BLK_TA_ISSUE,			/* sent to driver */
-	__BLK_TA_COMPLETE,		/* completed by driver */
-	__BLK_TA_PLUG,			/* queue was plugged */
-	__BLK_TA_UNPLUG_IO,		/* queue was unplugged by io */
-	__BLK_TA_UNPLUG_TIMER,		/* queue was unplugged by timer */
-	__BLK_TA_INSERT,		/* insert request */
-	__BLK_TA_SPLIT,			/* bio was split */
-	__BLK_TA_BOUNCE,		/* bio was bounced */
-	__BLK_TA_REMAP,			/* bio was remapped */
-	__BLK_TA_ABORT,			/* request aborted */
-	__BLK_TA_DRV_DATA,		/* binary driver data */
-};
-
 #define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
 
-/*
- * Notify events.
- */
-enum blktrace_notify {
-	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
-	__BLK_TN_TIMESTAMP,		/* include system clock */
-	__BLK_TN_MESSAGE,               /* Character string message */
-};
-
-/*
- * Trace actions in full. Additionally, read or write is masked
- */
-#define BLK_TA_QUEUE		(__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_BACKMERGE	(__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_FRONTMERGE	(__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
-#define	BLK_TA_GETRQ		(__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
-#define	BLK_TA_SLEEPRQ		(__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
-#define	BLK_TA_REQUEUE		(__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
-#define BLK_TA_ISSUE		(__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
-#define BLK_TA_COMPLETE		(__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
-#define BLK_TA_PLUG		(__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_UNPLUG_IO	(__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_UNPLUG_TIMER	(__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_INSERT		(__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
-#define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
-#define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_ABORT		(__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
-#define BLK_TA_DRV_DATA		(__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
-
-#define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
-#define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
-#define BLK_TN_MESSAGE		(__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
-
-#define BLK_IO_TRACE_MAGIC	0x65617400
-#define BLK_IO_TRACE_VERSION	0x07
-/*
- * The trace itself
- */
-struct blk_io_trace {
-	__u32 magic;		/* MAGIC << 8 | version */
-	__u32 sequence;		/* event number */
-	__u64 time;		/* in nanoseconds */
-	__u64 sector;		/* disk offset */
-	__u32 bytes;		/* transfer length */
-	__u32 action;		/* what happened */
-	__u32 pid;		/* who did it */
-	__u32 device;		/* device identifier (dev_t) */
-	__u32 cpu;		/* on what cpu did it happen */
-	__u16 error;		/* completion error */
-	__u16 pdu_len;		/* length of data after this trace */
-};
-
 struct pending_io {
 	/* sector offset of this IO */
 	u64 sector;
-- 
2.16.4


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/3] iowatcher: Handle cgroup information
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
  2020-05-06 13:39 ` [PATCH 1/3] blkparse: Handle cgroup information Jan Kara
  2020-05-06 13:39 ` [PATCH 2/3] iowatcher: Use blktrace_api.h Jan Kara
@ 2020-05-06 13:39 ` Jan Kara
  2020-05-06 13:50 ` [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Paolo Valente
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-06 13:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Jan Kara

Since Linux kernel commit 35fe6d763229 "block: use standard blktrace API
to output cgroup info for debug notes" the kernel can pass
__BLK_TA_CGROUP flag in the action field of generated events. Teach
iowatcher to ignore this information.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 iowatcher/blkparse.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/iowatcher/blkparse.c b/iowatcher/blkparse.c
index 982de9459826..6203854bd49c 100644
--- a/iowatcher/blkparse.c
+++ b/iowatcher/blkparse.c
@@ -51,7 +51,7 @@ extern int plot_io_action;
 extern int io_per_process;
 
 #define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
-#define BLK_TA_MASK ((1 << BLK_TC_SHIFT) - 1)
+#define BLK_TA_MASK (((1 << BLK_TC_SHIFT) - 1) & ~__BLK_TA_CGROUP)
 
 struct pending_io {
 	/* sector offset of this IO */
@@ -260,18 +260,23 @@ static void handle_notify(struct trace *trace)
 {
 	struct blk_io_trace *io = trace->io;
 	void *payload = (char *)io + sizeof(*io);
+	int pdu_len = io->pdu_len;
 	u32 two32[2];
 
-	if (io->action == BLK_TN_PROCESS) {
+	if (io->action & __BLK_TN_CGROUP) {
+		payload += sizeof(struct blk_io_cgroup_payload);
+		pdu_len -= sizeof(struct blk_io_cgroup_payload);
+	}
+	if ((io->action & ~__BLK_TN_CGROUP) == BLK_TN_PROCESS) {
 		if (io_per_process)
 			process_hash_insert(io->pid, payload);
 		return;
 	}
 
-	if (io->action != BLK_TN_TIMESTAMP)
+	if ((io->action & ~__BLK_TN_CGROUP) != BLK_TN_TIMESTAMP)
 		return;
 
-	if (io->pdu_len != sizeof(two32))
+	if (pdu_len != sizeof(two32))
 		return;
 
 	memcpy(two32, payload, sizeof(two32));
@@ -309,11 +314,16 @@ static int is_io_event(struct blk_io_trace *test)
 	char *message;
 	if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
 		return 1;
-	if (test->action == BLK_TN_MESSAGE) {
+	if ((test->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
 		int len = test->pdu_len;
+
+		message = (char *)(test + 1);
+		if (test->action & __BLK_TN_CGROUP) {
+			len -= sizeof(struct blk_io_cgroup_payload);
+			message += sizeof(struct blk_io_cgroup_payload);
+		}
 		if (len < 3)
 			return 0;
-		message = (char *)(test + 1);
 		if (strncmp(message, "fio ", 4) == 0) {
 			return 1;
 		}
@@ -372,13 +382,17 @@ static int parse_fio_bank_message(struct trace *trace, u64 *bank_ret, u64 *offse
 
 	if (!(test->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
 		return -1;
-	if (test->action != BLK_TN_MESSAGE)
+	if ((test->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE)
 		return -1;
 
+	message = (char *)(test + 1);
+	if (test->action & __BLK_TN_CGROUP) {
+		len -= sizeof(struct blk_io_cgroup_payload);
+		message += sizeof(struct blk_io_cgroup_payload);
+	}
 	/* the message is fio rw bank offset num_banks */
 	if (len < 3)
 		return -1;
-	message = (char *)(test + 1);
 	if (strncmp(message, "fio r ", 6) != 0)
 		return -1;
 
-- 
2.16.4


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
                   ` (2 preceding siblings ...)
  2020-05-06 13:39 ` [PATCH 3/3] iowatcher: Handle cgroup information Jan Kara
@ 2020-05-06 13:50 ` Paolo Valente
  2020-05-06 14:12   ` Jan Kara
  2020-05-06 14:42 ` Christoph Hellwig
  2020-05-20 13:38 ` Jens Axboe
  5 siblings, 1 reply; 9+ messages in thread
From: Paolo Valente @ 2020-05-06 13:50 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block

Does this fix make process names (with PIDS) be shown too in bfq log messages?

> Il giorno 6 mag 2020, alle ore 15:39, Jan Kara <jack@suse.cz> ha scritto:
> 
> I was investigating a performance issue with BFQ IO scheduler and I was
> pondering why I'm not seeing informational messages from BFQ. After quite
> some debugging I have found out that commit 35fe6d763229 "block: use
> standard blktrace API to output cgroup info for debug notes" broke standard
> blktrace API - namely the informational messages logged by bfq_log_bfqq()
> are no longer displayed by blkparse(8) tool. This is because these messages
> have now __BLK_TA_CGROUP bit set and that breaks flags checking in
> blkparse(1) and iowatcher(1). This series fixes both tools to be able to
> cope with events with __BLK_TA_CGROUP flag set.
> 
> 								Honza


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
  2020-05-06 13:50 ` [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Paolo Valente
@ 2020-05-06 14:12   ` Jan Kara
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-06 14:12 UTC (permalink / raw)
  To: Paolo Valente; +Cc: Jan Kara, Jens Axboe, linux-block

On Wed 06-05-20 15:50:30, Paolo Valente wrote:
> Does this fix make process names (with PIDS) be shown too in bfq log messages?

No, I didn't change how that's handled. Also AFAICS the kernel does not add
PID information to BLK_TN_MESSAGE events (which is how bfq log messages are
passed) so blkparse has nothing to display even if it wanted.

So poor man's solution is to derive pid from the BFQ queue name and then
lookup process name from some other event. Nicer solution would be to
modify blktrace handling in the kernel to add PID information to such
events as well so we get similar info as when using tracing infrastructure
directly.

								Honza

> > Il giorno 6 mag 2020, alle ore 15:39, Jan Kara <jack@suse.cz> ha scritto:
> > 
> > I was investigating a performance issue with BFQ IO scheduler and I was
> > pondering why I'm not seeing informational messages from BFQ. After quite
> > some debugging I have found out that commit 35fe6d763229 "block: use
> > standard blktrace API to output cgroup info for debug notes" broke standard
> > blktrace API - namely the informational messages logged by bfq_log_bfqq()
> > are no longer displayed by blkparse(8) tool. This is because these messages
> > have now __BLK_TA_CGROUP bit set and that breaks flags checking in
> > blkparse(1) and iowatcher(1). This series fixes both tools to be able to
> > cope with events with __BLK_TA_CGROUP flag set.
> > 
> > 								Honza
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
                   ` (3 preceding siblings ...)
  2020-05-06 13:50 ` [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Paolo Valente
@ 2020-05-06 14:42 ` Christoph Hellwig
  2020-05-07  9:49   ` Jan Kara
  2020-05-20 13:38 ` Jens Axboe
  5 siblings, 1 reply; 9+ messages in thread
From: Christoph Hellwig @ 2020-05-06 14:42 UTC (permalink / raw)
  To: Jan Kara; +Cc: Jens Axboe, linux-block

On Wed, May 06, 2020 at 03:39:30PM +0200, Jan Kara wrote:
> I was investigating a performance issue with BFQ IO scheduler and I was
> pondering why I'm not seeing informational messages from BFQ. After quite
> some debugging I have found out that commit 35fe6d763229 "block: use
> standard blktrace API to output cgroup info for debug notes" broke standard
> blktrace API - namely the informational messages logged by bfq_log_bfqq()
> are no longer displayed by blkparse(8) tool. This is because these messages
> have now __BLK_TA_CGROUP bit set and that breaks flags checking in
> blkparse(1) and iowatcher(1). This series fixes both tools to be able to
> cope with events with __BLK_TA_CGROUP flag set.

I'd much rather revert a kernel change that breaks frequently used
userspace.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
  2020-05-06 14:42 ` Christoph Hellwig
@ 2020-05-07  9:49   ` Jan Kara
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2020-05-07  9:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jan Kara, Jens Axboe, linux-block, Tejun Heo

On Wed 06-05-20 07:42:51, Christoph Hellwig wrote:
> On Wed, May 06, 2020 at 03:39:30PM +0200, Jan Kara wrote:
> > I was investigating a performance issue with BFQ IO scheduler and I was
> > pondering why I'm not seeing informational messages from BFQ. After quite
> > some debugging I have found out that commit 35fe6d763229 "block: use
> > standard blktrace API to output cgroup info for debug notes" broke standard
> > blktrace API - namely the informational messages logged by bfq_log_bfqq()
> > are no longer displayed by blkparse(8) tool. This is because these messages
> > have now __BLK_TA_CGROUP bit set and that breaks flags checking in
> > blkparse(1) and iowatcher(1). This series fixes both tools to be able to
> > cope with events with __BLK_TA_CGROUP flag set.
> 
> I'd much rather revert a kernel change that breaks frequently used
> userspace.

We've discussed it [1]. The commit was merged in 4.14 which shows that the
breakage is not that severe as nobody noticed until now. Actually I did
some more digging in history now and until commit 743210386c "cgroup: use
cgrp->kn->id as the cgroup ID" from last November the breakage was only
visible if you had CONFIG_BLK_CGROUP enabled and had bio with bi_blkcg set
or trace note messages for non-trivial cgroups (effectively only BFQ
informational messages). After that commit trace note messages are broken
whenever you have CONFIG_BLK_CGROUP. So the reason why nobody noticed is
probably because until 5.5 the breakage was actually difficult to hit.

So I'm undecided whether at this point it's better to revert the original
commit (as likely there is other tooling that depends on this info), just
fix the fact that since commit 743210386c all trace note messages will have
non-empty cgroup info, or just fixup blkparse and move on...

								Honza

[1] https://lore.kernel.org/r/20200430114711.GA6576@quack2.suse.cz

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14
  2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
                   ` (4 preceding siblings ...)
  2020-05-06 14:42 ` Christoph Hellwig
@ 2020-05-20 13:38 ` Jens Axboe
  5 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2020-05-20 13:38 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-block

On 5/6/20 7:39 AM, Jan Kara wrote:
> I was investigating a performance issue with BFQ IO scheduler and I was
> pondering why I'm not seeing informational messages from BFQ. After quite
> some debugging I have found out that commit 35fe6d763229 "block: use
> standard blktrace API to output cgroup info for debug notes" broke standard
> blktrace API - namely the informational messages logged by bfq_log_bfqq()
> are no longer displayed by blkparse(8) tool. This is because these messages
> have now __BLK_TA_CGROUP bit set and that breaks flags checking in
> blkparse(1) and iowatcher(1). This series fixes both tools to be able to
> cope with events with __BLK_TA_CGROUP flag set.

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, back to index

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-06 13:39 [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Jan Kara
2020-05-06 13:39 ` [PATCH 1/3] blkparse: Handle cgroup information Jan Kara
2020-05-06 13:39 ` [PATCH 2/3] iowatcher: Use blktrace_api.h Jan Kara
2020-05-06 13:39 ` [PATCH 3/3] iowatcher: Handle cgroup information Jan Kara
2020-05-06 13:50 ` [PATCH 0/3] Fix blkparse and iowatcher for kernels >= 4.14 Paolo Valente
2020-05-06 14:12   ` Jan Kara
2020-05-06 14:42 ` Christoph Hellwig
2020-05-07  9:49   ` Jan Kara
2020-05-20 13:38 ` Jens Axboe

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git