All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed
@ 2023-03-02 17:14 Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 02/14] dlm_controld: always create logdir Alexander Aring
                   ` (12 more replies)
  0 siblings, 13 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Currently dlm_controld sets a embedded data int value of dlm_controld
dump functionality failed for e.g. if lockspace cannot be found. The
dlm_tool does not parse this possible error functionality and will exit
successfully. This patch will add dlm_tool fail functionality if
dlm_controld sets an embedded data error value.
---
 dlm_controld/lib.c           | 25 +++++++++-------
 dlm_controld/libdlmcontrol.h | 10 +++----
 dlm_tool/main.c              | 57 +++++++++++++++++++++++++++---------
 3 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/dlm_controld/lib.c b/dlm_controld/lib.c
index 2888ad05..a21150f2 100644
--- a/dlm_controld/lib.c
+++ b/dlm_controld/lib.c
@@ -109,7 +109,7 @@ static void init_header(struct dlmc_header *h, int cmd, char *name,
 
 static char copy_buf[DLMC_DUMP_SIZE];
 
-static int do_dump(int cmd, char *name, char *buf)
+static int do_dump(int cmd, char *name, char *buf, int *data)
 {
 	struct dlmc_header h;
 	int fd, rv, len;
@@ -118,6 +118,8 @@ static int do_dump(int cmd, char *name, char *buf)
 
 	init_header(&h, cmd, name, 0);
 
+	*data = 0;
+
 	fd = do_connect(DLMC_QUERY_SOCK_PATH);
 	if (fd < 0) {
 		rv = fd;
@@ -134,6 +136,7 @@ static int do_dump(int cmd, char *name, char *buf)
 	if (rv < 0)
 		goto out_close;
 
+	*data = h.data;
 	len = h.len - sizeof(h);
 
 	if (len <= 0 || len > DLMC_DUMP_SIZE)
@@ -150,29 +153,29 @@ static int do_dump(int cmd, char *name, char *buf)
 	return rv;
 }
 
-int dlmc_dump_debug(char *buf)
+int dlmc_dump_debug(char *buf, int *data)
 {
-	return do_dump(DLMC_CMD_DUMP_DEBUG, NULL, buf);
+	return do_dump(DLMC_CMD_DUMP_DEBUG, NULL, buf, data);
 }
 
-int dlmc_dump_config(char *buf)
+int dlmc_dump_config(char *buf, int *data)
 {
-	return do_dump(DLMC_CMD_DUMP_CONFIG, NULL, buf);
+	return do_dump(DLMC_CMD_DUMP_CONFIG, NULL, buf, data);
 }
 
-int dlmc_dump_log_plock(char *buf)
+int dlmc_dump_log_plock(char *buf, int *data)
 {
-	return do_dump(DLMC_CMD_DUMP_LOG_PLOCK, NULL, buf);
+	return do_dump(DLMC_CMD_DUMP_LOG_PLOCK, NULL, buf, data);
 }
 
-int dlmc_dump_plocks(char *name, char *buf)
+int dlmc_dump_plocks(char *name, char *buf, int *data)
 {
-	return do_dump(DLMC_CMD_DUMP_PLOCKS, name, buf);
+	return do_dump(DLMC_CMD_DUMP_PLOCKS, name, buf, data);
 }
 
-int dlmc_dump_run(char *buf)
+int dlmc_dump_run(char *buf, int *data)
 {
-	return do_dump(DLMC_CMD_DUMP_RUN, NULL, buf);
+	return do_dump(DLMC_CMD_DUMP_RUN, NULL, buf, data);
 }
 
 int dlmc_reload_config(void)
diff --git a/dlm_controld/libdlmcontrol.h b/dlm_controld/libdlmcontrol.h
index a8654f3e..08f04c39 100644
--- a/dlm_controld/libdlmcontrol.h
+++ b/dlm_controld/libdlmcontrol.h
@@ -80,11 +80,11 @@ struct dlmc_lockspace {
 
 #define DLMC_STATUS_VERBOSE	0x00000001
 
-int dlmc_dump_debug(char *buf);
-int dlmc_dump_config(char *buf);
-int dlmc_dump_run(char *buf);
-int dlmc_dump_log_plock(char *buf);
-int dlmc_dump_plocks(char *name, char *buf);
+int dlmc_dump_debug(char *buf, int *data);
+int dlmc_dump_config(char *buf, int *data);
+int dlmc_dump_run(char *buf, int *data);
+int dlmc_dump_log_plock(char *buf, int *data);
+int dlmc_dump_plocks(char *name, char *buf, int *data);
 int dlmc_lockspace_info(char *lsname, struct dlmc_lockspace *ls);
 int dlmc_node_info(char *lsname, int nodeid, struct dlmc_node *node);
 /* caller need to free *lss */
diff --git a/dlm_tool/main.c b/dlm_tool/main.c
index 50f0cae9..52fd5b89 100644
--- a/dlm_tool/main.c
+++ b/dlm_tool/main.c
@@ -1466,36 +1466,51 @@ static void do_fence_ack(char *name)
 	dlmc_fence_ack(name);
 }
 
-static void do_plocks(char *name)
+static int do_plocks(char *name)
 {
 	char buf[DLMC_DUMP_SIZE];
+	int rv, data;
 
 	memset(buf, 0, sizeof(buf));
 
-	dlmc_dump_plocks(name, buf);
+	rv = dlmc_dump_plocks(name, buf, &data);
+	if (rv)
+		return rv;
+	else if (data)
+		return data;
 
 	buf[DLMC_DUMP_SIZE-1] = '\0';
 
 	do_write(STDOUT_FILENO, buf, strlen(buf));
+
+	return 0;
 }
 
-static void do_dump(int op)
+static int do_dump(int op)
 {
+	int rv = -EOPNOTSUPP, data;
 	char buf[DLMC_DUMP_SIZE];
 
 	memset(buf, 0, sizeof(buf));
 
 	if (op == OP_DUMP)
-		dlmc_dump_debug(buf);
+		rv = dlmc_dump_debug(buf, &data);
 	else if (op == OP_DUMP_CONFIG)
-		dlmc_dump_config(buf);
+		rv = dlmc_dump_config(buf, &data);
 	else if (op == OP_DUMP_RUN)
-		dlmc_dump_run(buf);
+		rv = dlmc_dump_run(buf, &data);
+
+	if (rv)
+		return rv;
+	else if (data)
+		return data;
 
 	buf[DLMC_DUMP_SIZE-1] = '\0';
 
 	do_write(STDOUT_FILENO, buf, strlen(buf));
 	printf("\n");
+
+	return 0;
 }
 
 static void do_reload_config(void)
@@ -1514,18 +1529,25 @@ static void do_set_config(void)
 		printf("set_config done\n");
 }
 
-static void do_log_plock(void)
+static int do_log_plock(void)
 {
 	char buf[DLMC_DUMP_SIZE];
+	int rv, data;
 
 	memset(buf, 0, sizeof(buf));
 
-	dlmc_dump_log_plock(buf);
+	rv = dlmc_dump_log_plock(buf, &data);
+	if (rv)
+		return rv;
+	else if (data)
+		return data;
 
 	buf[DLMC_DUMP_SIZE-1] = '\0';
 
 	do_write(STDOUT_FILENO, buf, strlen(buf));
 	printf("\n");
+
+	return 0;
 }
 
 static int do_run(int op)
@@ -1576,6 +1598,7 @@ int main(int argc, char **argv)
 {
 	prog_name = argv[0];
 	decode_arguments(argc, argv);
+	int rv = 0;
 
 	switch (operation) {
 
@@ -1605,11 +1628,11 @@ int main(int argc, char **argv)
 		break;
 
 	case OP_DUMP:
-		do_dump(operation);
+		rv = do_dump(operation);
 		break;
 
 	case OP_DUMP_CONFIG:
-		do_dump(operation);
+		rv = do_dump(operation);
 		break;
 
 	case OP_RELOAD_CONFIG:
@@ -1621,11 +1644,11 @@ int main(int argc, char **argv)
 		break;
 
 	case OP_LOG_PLOCK:
-		do_log_plock();
+		rv = do_log_plock();
 		break;
 
 	case OP_PLOCKS:
-		do_plocks(lsname);
+		rv = do_plocks(lsname);
 		break;
 
 	case OP_DEADLOCK_CHECK:
@@ -1654,9 +1677,15 @@ int main(int argc, char **argv)
 		break;
 
 	case OP_DUMP_RUN:
-		do_dump(operation);
+		rv = do_dump(operation);
 		break;
 	}
-	return 0;
+
+	if (rv < 0) {
+		fprintf(stderr, "failed: %s\n", strerror(-rv));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 02/14] dlm_controld: always create logdir
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 03/14] dlm_controld: add plock logfile Alexander Aring
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Currently the logdir will be created only if logfile does contain a
string. To add another logfiles we simple create the logdir always on
startup.
---
 dlm_controld/logging.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/dlm_controld/logging.c b/dlm_controld/logging.c
index 2c57138c..3298ef99 100644
--- a/dlm_controld/logging.c
+++ b/dlm_controld/logging.c
@@ -38,27 +38,27 @@ void init_logging(void)
 
 	set_logfile_priority();
 
-	if (logfile[0]) {
-		old_umask = umask(0077);
-		rv = mkdir(SYS_VARDIR, 0700);
-		if (rv < 0 && errno != EEXIST) {
-			umask(old_umask);
-			goto skip_logfile;
-		}
+	old_umask = umask(0077);
+	rv = mkdir(SYS_VARDIR, 0700);
+	if (rv < 0 && errno != EEXIST) {
+		umask(old_umask);
+		goto skip_logfile;
+	}
 
-		rv = mkdir(SYS_LOGDIR, 0700);
-		if (rv < 0 && errno != EEXIST) {
-			umask(old_umask);
-			goto skip_logfile;
-		}
+	rv = mkdir(SYS_LOGDIR, 0700);
+	if (rv < 0 && errno != EEXIST) {
+		umask(old_umask);
+		goto skip_logfile;
+	}
 
-		rv = mkdir(LOGDIR, 0700);
-		if (rv < 0 && errno != EEXIST) {
-			umask(old_umask);
-			goto skip_logfile;
-		}
+	rv = mkdir(LOGDIR, 0700);
+	if (rv < 0 && errno != EEXIST) {
 		umask(old_umask);
+		goto skip_logfile;
+	}
+	umask(old_umask);
 
+	if (logfile[0]) {
 		logfile_fp = fopen(logfile, "a+");
 		if (logfile_fp != NULL) {
 			int fd = fileno(logfile_fp);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 03/14] dlm_controld: add plock logfile
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 02/14] dlm_controld: always create logdir Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 04/14] dlm_controld: move processing of saved messages to plock level Alexander Aring
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The current plock logging is limited due a in-memory log buffer which
can be dumped via dlm_contol log_plock functionality. To trace plock
performance issues it's necessary to log plock activity in a bigger log
buffer such as a file. This patch will add functionality that plock
logging information will be appended into a log file.

WARNING: depending on plock activity the resulting log file can be
resulting in enormous file size. This option should be used for
debugging purpose only.
---
 dlm_controld/dlm_daemon.h |  4 ++++
 dlm_controld/logging.c    | 39 +++++++++++++++++++++++++++++++--------
 dlm_controld/main.c       |  5 +++++
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index f0bad90f..c74f684a 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -76,10 +76,12 @@
 
 #define RUN_FILE_NAME            "dlm_controld.pid"
 #define LOG_FILE_NAME            "dlm_controld.log"
+#define PLOCK_LOG_FILE_NAME      "plock.log"
 #define CONF_FILE_NAME           "dlm.conf"
 
 #define RUN_FILE_PATH            RUNDIR "/" RUN_FILE_NAME
 #define LOG_FILE_PATH            LOGDIR "/" LOG_FILE_NAME
+#define PLOCK_LOG_FILE_PATH      LOGDIR "/" PLOCK_LOG_FILE_NAME
 #define CONF_FILE_PATH           CONFDIR "/" CONF_FILE_NAME
 
 #define DEFAULT_LOG_MODE         LOG_MODE_OUTPUT_FILE | LOG_MODE_OUTPUT_SYSLOG
@@ -87,6 +89,7 @@
 #define DEFAULT_SYSLOG_PRIORITY  LOG_INFO
 #define DEFAULT_LOGFILE_PRIORITY LOG_INFO
 #define DEFAULT_LOGFILE          LOG_FILE_PATH
+#define DEFAULT_PLOCK_LOGFILE	 PLOCK_LOG_FILE_PATH
 
 #define DEFAULT_NETLINK_RCVBUF	(2 * 1024 * 1024)
 
@@ -110,6 +113,7 @@ enum {
         enable_fscontrol_ind,
         enable_plock_ind,
         plock_debug_ind,
+        plock_debug_logfile_ind,
         plock_rate_limit_ind,
         plock_ownership_ind,
         drop_resources_time_ind,
diff --git a/dlm_controld/logging.c b/dlm_controld/logging.c
index 3298ef99..83de2da4 100644
--- a/dlm_controld/logging.c
+++ b/dlm_controld/logging.c
@@ -12,7 +12,9 @@ static int syslog_facility;
 static int syslog_priority;
 static int logfile_priority;
 static char logfile[PATH_MAX];
+static char plock_logfile[PATH_MAX];
 static FILE *logfile_fp;
+static FILE *plock_logfile_fp;
 
 /* logfile_priority is the only one of these options that
    can be controlled from command line, environment variable
@@ -35,6 +37,7 @@ void init_logging(void)
 	syslog_priority = DEFAULT_SYSLOG_PRIORITY;
 	logfile_priority = DEFAULT_LOGFILE_PRIORITY;
 	strcpy(logfile, DEFAULT_LOGFILE);
+	strcpy(plock_logfile, DEFAULT_PLOCK_LOGFILE);
 
 	set_logfile_priority();
 
@@ -66,6 +69,15 @@ void init_logging(void)
 		}
 	}
 
+	if (dlm_options[plock_debug_logfile_ind].use_int &&
+	    plock_logfile[0]) {
+		plock_logfile_fp = fopen(plock_logfile, "a+");
+		if (plock_logfile_fp != NULL) {
+			int fd = fileno(plock_logfile_fp);
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
+		}
+	}
+
 skip_logfile:
 	openlog(DAEMON_NAME, LOG_CONS | LOG_PID, syslog_facility);
 }
@@ -75,6 +87,8 @@ void close_logging(void)
 	closelog();
 	if (logfile_fp)
 		fclose(logfile_fp);
+	if (plock_logfile_fp)
+		fclose(plock_logfile_fp);
 }
 
 #define NAME_ID_SIZE 32
@@ -151,6 +165,16 @@ static void log_save_str(int len, char *log_buf, unsigned int *point,
 	*wrap = w;
 }
 
+static void log_str_to_file(FILE *fp)
+{
+	time_t logtime = time(NULL);
+	char tbuf[64];
+
+	strftime(tbuf, sizeof(tbuf), "%b %d %T", localtime(&logtime));
+	fprintf(fp, "%s %s", tbuf, log_str);
+	fflush(fp);
+}
+
 void log_level(char *name_in, uint32_t level_in, const char *fmt, ...)
 {
 	va_list ap;
@@ -191,19 +215,18 @@ void log_level(char *name_in, uint32_t level_in, const char *fmt, ...)
 
 	if (level < LOG_NONE)
 		log_save_str(pos - 1, log_dump, &log_point, &log_wrap);
-	if (plock)
+	if (plock) {
 		log_save_str(pos - 1, log_dump_plock, &log_point_plock, &log_wrap_plock);
 
+		if (plock_logfile_fp)
+			log_str_to_file(plock_logfile_fp);
+	}
+
 	if (level <= syslog_priority)
 		syslog(level, "%s", log_str);
 
-	if (level <= logfile_priority && logfile_fp) {
-		time_t logtime = time(NULL);
-		char tbuf[64];
-		strftime(tbuf, sizeof(tbuf), "%b %d %T", localtime(&logtime));
-		fprintf(logfile_fp, "%s %s", tbuf, log_str);
-		fflush(logfile_fp);
-	}
+	if (level <= logfile_priority && logfile_fp)
+		log_str_to_file(logfile_fp);
 
 	if (!dlm_options[daemon_debug_ind].use_int)
 		return;
diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index c9d1c5f1..8e8d4038 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -1854,6 +1854,11 @@ static void set_opt_defaults(void)
 			0, NULL, 0, 1,
 			"enable plock debugging");
 
+	set_opt_default(plock_debug_logfile_ind,
+			"plock_debug_logfile", 'O', req_arg_bool,
+			0, NULL, 0, 1,
+			"write plock debugging to log file. Note: resulting log file can take enormous space.");
+
 	set_opt_default(plock_rate_limit_ind,
 			"plock_rate_limit", 'l', req_arg_int,
 			0, NULL, 0, 1,
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 04/14] dlm_controld: move processing of saved messages to plock level
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 02/14] dlm_controld: always create logdir Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 03/14] dlm_controld: add plock logfile Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 05/14] dlm_controld: remove ls parameter Alexander Aring
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Ad the loglevel is for save plock messages during corosync resource
membership upate. This patch will put the processing of saved messages
on the same loglevel.
---
 dlm_controld/plock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 692787e2..c2c80360 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -1612,7 +1612,7 @@ void process_saved_plocks(struct lockspace *ls)
 	struct dlm_header *hd;
 	int count = 0;
 
-	log_dlock(ls, "process_saved_plocks begin");
+	log_plock(ls, "process_saved_plocks begin");
 
 	if (list_empty(&ls->saved_messages))
 		goto out;
@@ -1643,7 +1643,7 @@ void process_saved_plocks(struct lockspace *ls)
 		count++;
 	}
  out:
-	log_dlock(ls, "process_saved_plocks %d done", count);
+	log_plock(ls, "process_saved_plocks %d done", count);
 }
 
 /* locks still marked SYNCING should not go into the ckpt; the new node
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 05/14] dlm_controld: remove ls parameter
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (2 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 04/14] dlm_controld: move processing of saved messages to plock level Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 06/14] dlm_controld: constify timeval of dt_usec() Alexander Aring
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The ls parameter in write_result() is not used, so we can remove it.
---
 dlm_controld/plock.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index c2c80360..462c9212 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -684,8 +684,7 @@ static int add_waiter(struct lockspace *ls, struct resource *r,
 	return 0;
 }
 
-static void write_result(struct lockspace *ls, struct dlm_plock_info *in,
-			 int rv)
+static void write_result(struct dlm_plock_info *in, int rv)
 {
 	int write_rv;
 
@@ -719,7 +718,7 @@ static void do_waiters(struct lockspace *ls, struct resource *r)
 		rv = lock_internal(ls, r, in);
 
 		if (in->nodeid == our_nodeid)
-			write_result(ls, in, rv);
+			write_result(in, rv);
 
 		free(w);
 	}
@@ -744,7 +743,7 @@ static void do_lock(struct lockspace *ls, struct dlm_plock_info *in,
 
  out:
 	if (in->nodeid == our_nodeid && rv != -EINPROGRESS)
-		write_result(ls, in, rv);
+		write_result(in, rv);
 
 	do_waiters(ls, r);
 	put_resource(ls, r);
@@ -768,7 +767,7 @@ static void do_unlock(struct lockspace *ls, struct dlm_plock_info *in,
 	}
 
 	if (in->nodeid == our_nodeid)
-		write_result(ls, in, rv);
+		write_result(in, rv);
 
  skip_result:
 	do_waiters(ls, r);
@@ -787,7 +786,7 @@ static void do_get(struct lockspace *ls, struct dlm_plock_info *in,
 	else
 		rv = 0;
 
-	write_result(ls, in, rv);
+	write_result(in, rv);
 	put_resource(ls, r);
 }
 
@@ -830,7 +829,7 @@ static void __receive_plock(struct lockspace *ls, struct dlm_plock_info *in,
 		log_elock(ls, "receive_plock error from %d optype %d",
 			  from, in->optype);
 		if (from == our_nodeid)
-			write_result(ls, in, -EINVAL);
+			write_result(in, -EINVAL);
 	}
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 06/14] dlm_controld: constify timeval of dt_usec()
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (3 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 05/14] dlm_controld: remove ls parameter Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 07/14] dlm_controld: add gcc format printf attribute to log_level Alexander Aring
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Those parameters are only used read only. We don't change any data where
those pointers point to.
---
 dlm_controld/plock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 462c9212..8b052a94 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -198,7 +198,7 @@ static unsigned long time_diff_ms(struct timeval *begin, struct timeval *end)
 	return (result.tv_sec * 1000) + (result.tv_usec / 1000);
 }
 
-static uint64_t dt_usec(struct timeval *start, struct timeval *stop)
+static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
 {
 	uint64_t dt;
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 07/14] dlm_controld: add gcc format printf attribute to log_level
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (4 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 06/14] dlm_controld: constify timeval of dt_usec() Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 08/14] dlm_controld: enable nanosec logging Alexander Aring
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch will add __attribute__ (( format( printf, 3, 4 ) ) to
log_level() functionality which has a format string as parameter. With
this change we enable compiler warnings if format string and applied
parameters do not match. We had some couple of warnings which this patch
fixes as well.
---
 dlm_controld/daemon_cpg.c | 2 +-
 dlm_controld/dlm_daemon.h | 1 +
 dlm_controld/plock.c      | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index e6a03a28..2e0634d4 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -2080,7 +2080,7 @@ int receive_run_reply(struct dlm_header *hd, int len)
 		return 0;
 
 	if (len != sizeof(struct run_reply)) {
-		log_debug("receive_run_reply %s bad len %s expect %d",
+		log_debug("receive_run_reply %s bad len %d expect %zu",
 			  rep->uuid, len, sizeof(struct run_reply));
 		run->info.reply_count++;
 		run->info.need_replies--;
diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index c74f684a..2c53e22b 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -220,6 +220,7 @@ EXTERN struct list_head run_ops;
 #define LOG_PLOCK 0x00010000
 #define LOG_NONE  0x00001111
 
+__attribute__ (( format( printf, 3, 4 ) ))
 void log_level(char *name_in, uint32_t level_in, const char *fmt, ...);
 
 #define log_error(fmt, args...) log_level(NULL, LOG_ERR, fmt, ##args)
diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 8b052a94..2f0392c3 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -1925,7 +1925,8 @@ void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
 		/* no locks should be included for owned resources */
 
 		if (owner && count) {
-			log_elock(ls, "recv_plocks_data %d:%u n %llu o %d bad count %u",
+			log_elock(ls, "recv_plocks_data %d:%u n %llu o %d bad count %" PRIu32,
+				  hd->nodeid, hd->msgdata,
 				  (unsigned long long)num, owner, count);
 			goto fail_free;
 		}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 08/14] dlm_controld: enable nanosec logging
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (5 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 07/14] dlm_controld: add gcc format printf attribute to log_level Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 09/14] dlm_controld: use write_result() Alexander Aring
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Current scope of logging are the seconds since dlm_controld started.
This patch adds nanosecond scope which are requested from the os anyway.
---
 dlm_controld/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dlm_controld/main.c b/dlm_controld/main.c
index 8e8d4038..fdfebd1f 100644
--- a/dlm_controld/main.c
+++ b/dlm_controld/main.c
@@ -128,11 +128,13 @@ int do_write(int fd, void *buf, size_t count)
 	return 0;
 }
 
+enum { NS_PER_SECOND = 1000000000 };
+
 uint64_t monotime(void)
 {
 	struct timespec ts;
 	clock_gettime(CLOCK_MONOTONIC, &ts);
-	return ts.tv_sec;
+	return (ts.tv_sec * NS_PER_SECOND)  + ts.tv_nsec;
 }
 
 static void client_alloc(void)
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 09/14] dlm_controld: use write_result()
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (6 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 08/14] dlm_controld: enable nanosec logging Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 10/14] dlm_controld: be sure we stop lockspaces before shutdown Alexander Aring
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch will use write_result() instead of code the same code again
what write_result() is doing.

Reported-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 dlm_controld/plock.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 2f0392c3..24ad777a 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -1600,8 +1600,7 @@ void process_plocks(int ci)
 #else
 	if (!(info.flags & DLM_PLOCK_FL_CLOSE)) {
 #endif
-		info.rv = rv;
-		rv = write(plock_device_fd, &info, sizeof(info));
+		write_result(&info, rv);
 	}
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 10/14] dlm_controld: be sure we stop lockspaces before shutdown
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (7 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 09/14] dlm_controld: use write_result() Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 11/14] dlm_controld: constify name_in in log_level() Alexander Aring
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The dlm_controld shutdown process will drop all dlm configfs entries of
dlm communication settings regarding corosync address configuration.
This will end in an socket close in the dlm subsystem of the connection
which belongs to it. Newly introduced kernel warnings will check if the
lockspace is stopped before we close the socket connection. This is
necessary because no new dlm messages should be triggered afterwards. To
be sure dlm_controld does stop the lockspaces we will make sure that it
does it always in close_cpg_daemon. Currently there is a missing handle
to stop all lockspaces when there is no cpg_handle_daemon anymore.
---
 dlm_controld/daemon_cpg.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
index 2e0634d4..10c80ddc 100644
--- a/dlm_controld/daemon_cpg.c
+++ b/dlm_controld/daemon_cpg.c
@@ -2527,6 +2527,15 @@ int setup_cpg_daemon(void)
 	return -1;
 }
 
+static void stop_lockspaces(void)
+{
+	struct lockspace *ls;
+
+	list_for_each_entry(ls, &lockspaces, list) {
+		cpg_stop_kernel(ls);
+	}
+}
+
 void close_cpg_daemon(void)
 {
 	struct lockspace *ls;
@@ -2534,8 +2543,11 @@ void close_cpg_daemon(void)
 	struct cpg_name name;
 	int i = 0;
 
-	if (!cpg_handle_daemon)
+	if (!cpg_handle_daemon) {
+		stop_lockspaces();
 		return;
+	}
+
 	if (cluster_down)
 		goto fin;
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 11/14] dlm_controld: constify name_in in log_level()
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (8 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 10/14] dlm_controld: be sure we stop lockspaces before shutdown Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 12/14] dlm_controld: plock log waiters state Alexander Aring
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch will constify the name_in parameter of log_level() which is
only used as readonly pointer.
---
 dlm_controld/dlm_daemon.h | 2 +-
 dlm_controld/logging.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
index 2c53e22b..2a2b0dbf 100644
--- a/dlm_controld/dlm_daemon.h
+++ b/dlm_controld/dlm_daemon.h
@@ -221,7 +221,7 @@ EXTERN struct list_head run_ops;
 #define LOG_NONE  0x00001111
 
 __attribute__ (( format( printf, 3, 4 ) ))
-void log_level(char *name_in, uint32_t level_in, const char *fmt, ...);
+void log_level(const char *name_in, uint32_t level_in, const char *fmt, ...);
 
 #define log_error(fmt, args...) log_level(NULL, LOG_ERR, fmt, ##args)
 #define log_debug(fmt, args...) log_level(NULL, LOG_DEBUG, fmt, ##args)
diff --git a/dlm_controld/logging.c b/dlm_controld/logging.c
index 83de2da4..e71fe52c 100644
--- a/dlm_controld/logging.c
+++ b/dlm_controld/logging.c
@@ -175,7 +175,7 @@ static void log_str_to_file(FILE *fp)
 	fflush(fp);
 }
 
-void log_level(char *name_in, uint32_t level_in, const char *fmt, ...)
+void log_level(const char *name_in, uint32_t level_in, const char *fmt, ...)
 {
 	va_list ap;
 	char name[NAME_ID_SIZE + 2];
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 12/14] dlm_controld: plock log waiters state
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (9 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 11/14] dlm_controld: constify name_in in log_level() Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state Alexander Aring
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool Alexander Aring
  12 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds additional log_plock() calls to track the start and end
of waiter states inside the dlm_controld lock database. With this
information we know when a specific lock had contention in view of
dlm_controld cluster-wide plock view.
---
 dlm_controld/plock.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 24ad777a..b93863f7 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -208,6 +208,25 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
 	return dt;
 }
 
+static void plock_print_start_waiter(const struct lockspace *ls,
+				     struct lock_waiter *w)
+{
+	log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
+		  (unsigned long long)w->info.number,
+		  w,
+		  (unsigned long long)w->info.start,
+		  (unsigned long long)w->info.end,
+		  w->info.nodeid, w->info.pid,
+		  (unsigned long long)w->info.owner);
+}
+
+static void plock_print_end_waiter(const struct lockspace *ls,
+				   const struct lock_waiter *w)
+{
+	log_plock(ls, "state waiter end %llx %p",
+		  (unsigned long long)w->info.number, w);
+}
+
 static struct resource * rb_search_plock_resource(struct lockspace *ls, uint64_t number)
 {
 	struct rb_node *n = ls->plock_resources_root.rb_node;
@@ -659,13 +678,7 @@ static void clear_waiters(struct lockspace *ls, struct resource *r,
 			continue;
 
 		list_del(&w->list);
-
-		log_dlock(ls, "clear waiter %llx %llx-%llx %d/%u/%llx",
-			  (unsigned long long)in->number,
-			  (unsigned long long)in->start,
-			  (unsigned long long)in->end,
-			  in->nodeid, in->pid,
-			  (unsigned long long)in->owner);
+		plock_print_end_waiter(ls, w);
 		free(w);
 	}
 }
@@ -680,6 +693,7 @@ static int add_waiter(struct lockspace *ls, struct resource *r,
 	if (!w)
 		return -ENOMEM;
 	memcpy(&w->info, in, sizeof(struct dlm_plock_info));
+	plock_print_start_waiter(ls, w);
 	list_add_tail(&w->list, &r->waiters);
 	return 0;
 }
@@ -708,6 +722,7 @@ static void do_waiters(struct lockspace *ls, struct resource *r)
 			continue;
 
 		list_del(&w->list);
+		plock_print_end_waiter(ls, w);
 
 		/*
 		log_group(ls, "take waiter %llx %llx-%llx %d/%u/%llx",
@@ -1835,7 +1850,7 @@ void send_all_plocks_data(struct lockspace *ls, uint32_t seq, uint32_t *plocks_d
 		  our_nodeid, seq, send_count);
 }
 
-static void free_r_lists(struct resource *r)
+static void free_r_lists(const struct lockspace *ls, struct resource *r)
 {
 	struct posix_lock *po, *po2;
 	struct lock_waiter *w, *w2;
@@ -1847,6 +1862,7 @@ static void free_r_lists(struct resource *r)
 
 	list_for_each_entry_safe(w, w2, &r->waiters, list) {
 		list_del(&w->list);
+		plock_print_end_waiter(ls, w);
 		free(w);
 	}
 }
@@ -1967,6 +1983,7 @@ void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
 			w->info.pid	= le32_to_cpu(pp->pid);
 			w->info.nodeid	= le32_to_cpu(pp->nodeid);
 			w->info.ex	= pp->ex;
+			plock_print_start_waiter(ls, w);
 			list_add_tail(&w->list, &r->waiters);
 		}
 		pp++;
@@ -1984,7 +2001,7 @@ void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
 
  fail_free:
 	if (!(flags & RD_CONTINUE)) {
-		free_r_lists(r);
+		free_r_lists(ls, r);
 		free(r);
 	}
 	return;
@@ -1999,7 +2016,7 @@ void clear_plocks_data(struct lockspace *ls)
 		return;
 
 	list_for_each_entry_safe(r, r2, &ls->plock_resources, list) {
-		free_r_lists(r);
+		free_r_lists(ls, r);
 		rb_del_plock_resource(ls, r);
 		list_del(&r->list);
 		free(r);
@@ -2038,6 +2055,7 @@ void purge_plocks(struct lockspace *ls, int nodeid, int unmount)
 		list_for_each_entry_safe(w, w2, &r->waiters, list) {
 			if (w->info.nodeid == nodeid || unmount) {
 				list_del(&w->list);
+				plock_print_end_waiter(ls, w);
 				free(w);
 				purged++;
 			}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (10 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 12/14] dlm_controld: plock log waiters state Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-03 13:38   ` Andreas Gruenbacher
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool Alexander Aring
  12 siblings, 1 reply; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds additional log_plock() calls to track the start and end
of lock states inside the dlm_controld lock database. With this
information we know when a specific lock had a specfic lock mode (WR or
RD) in use as in view of dlm_controld cluster-wide plock view.
---
 dlm_controld/plock.c | 82 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 19 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index b93863f7..39bdd1f6 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -227,6 +227,26 @@ static void plock_print_end_waiter(const struct lockspace *ls,
 		  (unsigned long long)w->info.number, w);
 }
 
+static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
+				    const struct posix_lock *po)
+{
+	log_plock(ls, "state plock start %llx %p %s %llx-%llx %d/%u/%llx",
+		  (unsigned long long)number,
+		  po,
+		  po->ex ? "WR" : "RD",
+		  (unsigned long long)po->start,
+		  (unsigned long long)po->end,
+		  po->nodeid, po->pid,
+		  (unsigned long long)po->owner);
+}
+
+static void plock_print_end_plock(const struct lockspace *ls, uint64_t number,
+				  const struct posix_lock *po)
+{
+	log_plock(ls, "state plock end %llx %p",
+		  (unsigned long long)number, po);
+}
+
 static struct resource * rb_search_plock_resource(struct lockspace *ls, uint64_t number)
 {
 	struct rb_node *n = ls->plock_resources_root.rb_node;
@@ -466,8 +486,9 @@ static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get)
 	return 0;
 }
 
-static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
-		    uint32_t pid, int ex, uint64_t start, uint64_t end)
+static int add_lock(const struct lockspace *ls, struct resource *r,
+		    uint32_t nodeid, uint64_t owner, uint32_t pid,
+		    int ex, uint64_t start, uint64_t end, uint64_t number)
 {
 	struct posix_lock *po;
 
@@ -482,6 +503,7 @@ static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
 	po->owner = owner;
 	po->pid = pid;
 	po->ex = ex;
+	plock_print_start_plock(ls, number, po);
 	list_add_tail(&po->list, &r->locks);
 
 	return 0;
@@ -491,8 +513,8 @@ static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
    1. add new lock for non-overlap area of RE, orig mode
    2. convert RE to RN range and mode */
 
-static int lock_case1(struct posix_lock *po, struct resource *r,
-		      struct dlm_plock_info *in)
+static int lock_case1(const struct lockspace *ls, struct posix_lock *po,
+		      struct resource *r, struct dlm_plock_info *in)
 {
 	uint64_t start2, end2;
 	int rv;
@@ -508,7 +530,8 @@ static int lock_case1(struct posix_lock *po, struct resource *r,
 	po->end = in->end;
 	po->ex = in->ex;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid, !in->ex, start2, end2);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid, !in->ex, start2,
+		      end2, in->number);
  out:
 	return rv;
 }
@@ -518,19 +541,20 @@ static int lock_case1(struct posix_lock *po, struct resource *r,
    2. add new lock for back fragment, orig mode
    3. convert RE to RN range and mode */
 			 
-static int lock_case2(struct posix_lock *po, struct resource *r,
-		      struct dlm_plock_info *in)
+static int lock_case2(const struct lockspace *ls, struct posix_lock *po,
+		      struct resource *r, struct dlm_plock_info *in)
 
 {
 	int rv;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      !in->ex, po->start, in->start - 1);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      !in->ex, po->start, in->start - 1,
+		      in->number);
 	if (rv)
 		goto out;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      !in->ex, in->end + 1, po->end);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      !in->ex, in->end + 1, po->end, in->number);
 	if (rv)
 		goto out;
 
@@ -561,34 +585,43 @@ static int lock_internal(struct lockspace *ls, struct resource *r,
 			if (po->ex == in->ex)
 				goto out;
 
+			plock_print_end_plock(ls, in->number, po);
 			/* ranges the same - just update the existing lock */
 			po->ex = in->ex;
+			plock_print_start_plock(ls, in->number, po);
 			goto out;
 
 		case 1:
 			if (po->ex == in->ex)
 				goto out;
 
-			rv = lock_case1(po, r, in);
+			plock_print_end_plock(ls, in->number, po);
+			rv = lock_case1(ls, po, r, in);
+			plock_print_start_plock(ls, in->number, po);
 			goto out;
 
 		case 2:
 			if (po->ex == in->ex)
 				goto out;
 
-			rv = lock_case2(po, r, in);
+			plock_print_end_plock(ls, in->number, po);
+			rv = lock_case2(ls, po, r, in);
+			plock_print_start_plock(ls, in->number, po);
 			goto out;
 
 		case 3:
 			list_del(&po->list);
+			plock_print_end_plock(ls, in->number, po);
 			free(po);
 			break;
 
 		case 4:
+			plock_print_end_plock(ls, in->number, po);
 			if (po->start < in->start)
 				po->end = in->start - 1;
 			else
 				po->start = in->end + 1;
+			plock_print_start_plock(ls, in->number, po);
 			break;
 
 		default:
@@ -597,8 +630,8 @@ static int lock_internal(struct lockspace *ls, struct resource *r,
 		}
 	}
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      in->ex, in->start, in->end);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      in->ex, in->start, in->end, in->number);
  out:
 	return rv;
 
@@ -624,6 +657,7 @@ static int unlock_internal(struct lockspace *ls, struct resource *r,
 			/* ranges the same - just remove the existing lock */
 
 			list_del(&po->list);
+			plock_print_end_plock(ls, in->number, po);
 			free(po);
 			goto out;
 
@@ -631,16 +665,20 @@ static int unlock_internal(struct lockspace *ls, struct resource *r,
 			/* RN within RE and starts or ends on RE boundary -
 			 * shrink and update RE */
 
+			plock_print_end_plock(ls, in->number, po);
 			rv = shrink_range(po, in->start, in->end);
+			plock_print_start_plock(ls, in->number, po);
 			goto out;
 
 		case 2:
 			/* RN within RE - shrink and update RE to be front
 			 * fragment, and add a new lock for back fragment */
 
-			rv = add_lock(r, in->nodeid, in->owner, in->pid,
-				      po->ex, in->end + 1, po->end);
+			plock_print_end_plock(ls, in->number, po);
+			rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+				      po->ex, in->end + 1, po->end, in->number);
 			po->end = in->start - 1;
+			plock_print_start_plock(ls, in->number, po);
 			goto out;
 
 		case 3:
@@ -648,6 +686,7 @@ static int unlock_internal(struct lockspace *ls, struct resource *r,
 			 * because RN could cover other locks */
 
 			list_del(&po->list);
+			plock_print_end_plock(ls, in->number, po);
 			free(po);
 			continue;
 
@@ -656,7 +695,9 @@ static int unlock_internal(struct lockspace *ls, struct resource *r,
 			 * update RE, then continue because RN could cover
 			 * other locks */
 
+			plock_print_end_plock(ls, in->number, po);
 			rv = shrink_range(po, in->start, in->end);
+			plock_print_start_plock(ls, in->number, po);
 			continue;
 
 		default:
@@ -1346,8 +1387,8 @@ static void _receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
 	}
 
 	if (hd->type == DLM_MSG_PLOCK_SYNC_LOCK)
-		add_lock(r, info.nodeid, info.owner, info.pid, info.ex, 
-			 info.start, info.end);
+		add_lock(ls, r, info.nodeid, info.owner, info.pid, info.ex,
+			 info.start, info.end, info.number);
 	else if (hd->type == DLM_MSG_PLOCK_SYNC_WAITER)
 		add_waiter(ls, r, &info);
 }
@@ -1857,6 +1898,7 @@ static void free_r_lists(const struct lockspace *ls, struct resource *r)
 
 	list_for_each_entry_safe(po, po2, &r->locks, list) {
 		list_del(&po->list);
+		plock_print_end_plock(ls, r->number, po);
 		free(po);
 	}
 
@@ -1972,6 +2014,7 @@ void receive_plocks_data(struct lockspace *ls, struct dlm_header *hd, int len)
 			po->pid		= le32_to_cpu(pp->pid);
 			po->nodeid	= le32_to_cpu(pp->nodeid);
 			po->ex		= pp->ex;
+			plock_print_start_plock(ls, num, po);
 			list_add_tail(&po->list, &r->locks);
 		} else {
 			w = malloc(sizeof(struct lock_waiter));
@@ -2047,6 +2090,7 @@ void purge_plocks(struct lockspace *ls, int nodeid, int unmount)
 		list_for_each_entry_safe(po, po2, &r->locks, list) {
 			if (po->nodeid == nodeid || unmount) {
 				list_del(&po->list);
+				plock_print_end_plock(ls, r->number, po);
 				free(po);
 				purged++;
 			}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool
  2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
                   ` (11 preceding siblings ...)
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state Alexander Aring
@ 2023-03-02 17:14 ` Alexander Aring
  2023-03-03 14:40   ` Alexander Aring
  12 siblings, 1 reply; 23+ messages in thread
From: Alexander Aring @ 2023-03-02 17:14 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds a python tool to plot with use of python plotly [0] the
dlm_controld cluster-wide lockdb. It scans dlm_controld plock log files
for the required information about possible contention (waiters) and the
acquired lock modes over time.

Example:

./lockdb_plot /var/log/dlm_controld/plock.log

and plotly will start a webserver and you can see a generated, interactive
plot about the lockranges in y axis and time in x axis. Depending on
usage it's limited and does not scale. Ther exists arguments like --info
to get plock.log timing information and with --ts_start/ts_end you can
filter a specific time range to only show a small part of the plock log
to reduce the amount of rectangles need to be drawn by the browser.

It shows only lock modes and contention, no lock requests. Means A trylock
is only visible when it was successful. A non trylock will be drawn and
has possible contention.

With this plot it should be easy to figure out on which node a specific
lock range was acquired and resulted in contention.

[0] https://plotly.com/python/
---
 python/tools/lockdb_plot | 429 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 429 insertions(+)
 create mode 100755 python/tools/lockdb_plot

diff --git a/python/tools/lockdb_plot b/python/tools/lockdb_plot
new file mode 100755
index 00000000..761bdea2
--- /dev/null
+++ b/python/tools/lockdb_plot
@@ -0,0 +1,429 @@
+#!/usr/bin/env python3
+
+import plotly.graph_objects as go
+from enum import Enum
+import argparse
+import sys
+
+NSECS_PER_SECS = 1000000000
+
+def nsecs_to_secs(nsecs):
+    return nsecs/NSECS_PER_SECS
+
+def secs_to_nsecs(secs):
+    return secs * NSECS_PER_SECS
+
+# entry point of collecting data
+nodes = dict()
+# will parse log to get our_nodeid
+our_nodeid = None
+# legend groups, tmp set to show legend entry only once
+legend_groups = set()
+
+class EType(Enum):
+    LOCK = 1
+    WAITER = 2
+
+def lookup_lockdb(nodeid, ls, number):
+    if nodeid not in nodes:
+        nodes[nodeid] = dict()
+
+    lockspaces = nodes[nodeid]
+    if ls not in lockspaces:
+        lockspaces[ls] = dict()
+
+    if number not in lockspaces[ls]:
+        lockspaces[ls][number] = LockDb(nodeid, ls, number)
+
+    return lockspaces[ls][number]
+
+def plotly_rect_shape(x0, y0, x1, y1, color, opacity, text, group):
+    # we show it in secs
+    x0 = nsecs_to_secs(x0)
+    x1 = nsecs_to_secs(x1)
+
+    rv = go.Scatter(x=[x0,x0,x1,x1,x0],
+                    y=[y0,y1,y1,y0,y0],
+                    fill="toself",
+                    fillcolor=color,
+                    mode='lines',
+                    name=group,
+                    legendgroup=group,
+                    showlegend=group not in legend_groups,
+                    visible=True,
+                    text=text,
+                    line=dict(color=color,
+                              width=1),
+                    opacity=opacity)
+
+    legend_groups.add(group)
+    return rv
+
+
+class LockDbRect():
+    def __init__(self, **kwargs):
+        self.ls = kwargs["ls"]
+        self.number = kwargs["number"]
+        self.pid = kwargs["pid"]
+        self.nodeid = kwargs["nodeid"]
+        self.owner = kwargs["owner"]
+        self.ts_start = kwargs["ts_start"]
+        self.ts_end = kwargs["ts_end"]
+        # +1 because lock range 1000-1000 will then be 999-1000
+        # exclusive vs inclusive we need to decide how to display
+        # it but we don't set a zero height
+        self.lk_start = kwargs["lk_start"] - 1
+        self.lk_end = kwargs["lk_end"]
+
+    def create_shape(self):
+        return plotly_rect_shape(self.ts_start,
+                                 self.lk_start,
+                                 self.ts_end,
+                                 self.lk_end,
+                                 self.get_color(),
+                                 self.get_opacity(),
+                                 self.get_text(),
+                                 self.get_group())
+
+    def get_duration(self):
+        return self.ts_end - self.ts_start
+
+    def get_range(self):
+        return self.lk_end - self.lk_start
+
+    def __str__(self):
+        return str(self.__dict__)
+
+    def __repr__(self):
+        return str(self)
+
+class LockDbRectWaiter(LockDbRect):
+    def __init__(self, **kwargs):
+        super(LockDbRectWaiter, self).__init__(**kwargs)
+
+    def get_color(self):
+        return "green"
+
+    def get_opacity(self):
+        return 0.2
+
+    def get_text(self):
+        # TODO make it human readable for high numbers?
+        ts_start = nsecs_to_secs(self.ts_start);
+        ts_end = nsecs_to_secs(self.ts_end);
+        dt = nsecs_to_secs(self.get_duration());
+
+        text = "nodeid: {}, pid: {}, owner: {}<br>".format(self.nodeid, self.pid, hex(self.owner)) + \
+               "ts_start: {0:.3f},".format(ts_start) + \
+               "ts_end: {0:.3f},".format(ts_end) + \
+               "dt: {0:.3f}<br>".format(dt) + \
+               "lk_start: {}, lk_end: {}, dy: {}<br>".format(hex(self.lk_start), hex(self.lk_end), hex(self.get_range()))
+
+        return text
+
+    def get_text_pl(self):
+        text = "contention:<br>" + \
+                "start: {0:.3f},".format(nsecs_to_secs(self.ts_start)) + \
+                "end: {0:.3f},".format(nsecs_to_secs(self.ts_end)) + \
+                "dt: {0:.3f}".format(nsecs_to_secs(self.get_duration()))
+
+        return text
+
+    def get_group(self):
+        return "contention"
+
+class LockDbRectPLock(LockDbRect):
+    def __init__(self, **kwargs):
+        super(LockDbRectPLock, self).__init__(**kwargs)
+        self.mode = kwargs["mode"]
+        if self.mode == "WR":
+            self.color = "red"
+            self.group = "write lock"
+        elif self.mode == "RD":
+            self.color = "blue"
+            self.group = "read lock"
+        else:
+            print("{}: received unknown mode".format(self.ts_start))
+            return
+
+        self.waiter = None
+
+    def __str__(self):
+        return str(self.__dict__)
+
+    def __repr__(self):
+        return str(self)
+
+    def has_waiter(self):
+        return self.waiter
+
+    def set_waiter(self, wa):
+        if self.has_waiter():
+            print("{}: has already a waiter set".format(self.ts_start))
+            return
+
+        self.waiter = wa
+
+    def get_waiter(self):
+        return self.waiter
+
+    def get_color(self):
+        return self.color
+
+    def get_opacity(self):
+        return 0.4
+
+    def get_text(self):
+        # TODO make it human readable for high numbers?
+        ts_start = nsecs_to_secs(self.ts_start);
+        ts_end = nsecs_to_secs(self.ts_end);
+        dt = nsecs_to_secs(self.get_duration());
+
+        text = "nodeid: {}, pid: {}, owner: {}<br>".format(self.nodeid, self.pid, hex(self.owner)) + \
+               "ts_start: {0:.3f},".format(ts_start) + \
+               "ts_end: {0:.3f},".format(ts_end) + \
+               "dt: {0:.3f}<br>".format(dt) + \
+               "lk_start: {}, lk_end: {}, dy: {}<br>".format(hex(self.lk_start), hex(self.lk_end), hex(self.get_range()))
+
+        if self.has_waiter():
+            text = text + "<br>{}".format(self.get_waiter().get_text_pl())
+
+        return text;
+
+    def get_group(self):
+        return self.group
+
+class LockDb:
+    def __init__(self, nodeid, ls, number):
+        self.nodeid = nodeid
+        self.ls = ls
+        self.number = number
+        self.__ts = dict()
+        self.plocks = list()
+        self.__last_plock = dict()
+        self.waiters = list()
+        self.__last_waiter = dict()
+        self.__ts_start_min = None
+        self.__ts_end_max = 0
+
+    def event_start(self, vals):
+        self.__ts[vals["ptr"]] = vals
+
+    def __plock(self, vals_start, vals_end):
+        pl = LockDbRectPLock(ls=vals_start["ls"],
+                             number=vals_start["number"],
+                             pid=vals_start["pid"],
+                             nodeid=vals_start["nodeid"],
+                             owner=vals_start["owner"],
+                             ts_start=vals_start["ts"],
+                             ts_end=vals_end["ts"],
+                             lk_start=vals_start["start"],
+                             lk_end=vals_start["end"],
+                             mode=vals_start["mode"])
+        self.plocks.append(pl)
+        self.__last_plock[vals_end["ptr"]] = pl
+
+        try:
+            pl.set_waiter(self.__last_waiter[vals_start["owner"]])
+            del(self.__last_waiter[vals_start["owner"]])
+        except KeyError:
+            pass
+
+    def __waiter(self, vals_start, vals_end):
+        wa = LockDbRectWaiter(ls=vals_start["ls"],
+                              number=vals_start["number"],
+                              pid=vals_start["pid"],
+                              nodeid=vals_start["nodeid"],
+                              owner=vals_start["owner"],
+                              ts_start=vals_start["ts"],
+                              ts_end=vals_end["ts"],
+                              lk_start=vals_start["start"],
+                              lk_end=vals_start["end"])
+        self.waiters.append(wa)
+        self.__last_waiter[vals_start["owner"]] = wa
+
+    def event_end(self, vals):
+        try:
+            vals_start = self.__ts[vals["ptr"]]
+            del(self.__ts[vals["ptr"]])
+        except KeyError:
+            print("{}: cannot find previous start, ignoring".format(vals["ts"]))
+            return
+
+        if not self.__ts_start_min:
+            self.__ts_start_min = vals_start["ts"]
+        else:
+            self.__ts_start_min = min(self.__ts_start_min, vals_start["ts"])
+
+        if not self.__ts_start_min:
+            self.__ts_end_max = vals["ts"]
+        else:
+            self.__ts_end_max = max(self.__ts_end_max, vals["ts"])
+
+        if vals["event"] == EType.LOCK:
+            self.__plock(vals_start, vals)
+        elif vals["event"] == EType.WAITER:
+            self.__waiter(vals_start, vals)
+        else:
+            print("{}: unknown event type".format(vals["ts"]))
+
+    def add_shapes(self, fig):
+        global legend_groups
+        legend_groups = set()
+
+        # first waiters to be in background
+        for wa in self.waiters:
+            fig.add_trace(wa.create_shape())
+
+        # next layer to draw lock states
+        for pl in self.plocks:
+            fig.add_trace(pl.create_shape())
+
+    def get_ts_min(self):
+        return self.__ts_start_min
+
+    def get_ts_max(self):
+        return self.__ts_end_max
+
+    def __str__(self):
+        return "ls {} inode {}".format(self.ls, self.number)
+
+    def __repr__(self):
+        return str(self)
+
+parser = argparse.ArgumentParser(description="Plots dlm_controld lock databases")
+parser.add_argument('-i', "--info", action="store_true")
+parser.add_argument('-s', "--ts_start", type=float)
+parser.add_argument('-e', "--ts_end", type=float)
+
+parser.add_argument('plock_logs', type=argparse.FileType('r'), nargs='+')
+args = parser.parse_args()
+
+for f in args.plock_logs:
+    lines = f.readlines()
+
+    # Current way to figure out our nodeid
+    for line in lines:
+        if "read plock" in line:
+            lsplit = line.split()
+            npo = lsplit[11].split('/')
+            nodeid = int(npo[0])
+            our_nodeid = nodeid
+            break
+
+    if not our_nodeid:
+        print("{}: could not determine our nodeid".format(f))
+        continue
+
+    for line in lines:
+        if "state plock" in line:
+            pass
+        elif "state waiter" in line:
+            pass
+        else:
+            continue
+
+        vals = dict()
+        lsplit = line.split()
+        vals["ts"] = int(lsplit[3])
+
+        if args.ts_start and secs_to_nsecs(args.ts_start) > int(vals["ts"]):
+            continue
+
+        if args.ts_end and secs_to_nsecs(args.ts_end) < int(vals["ts"]):
+            break;
+
+        vals["ls"] = lsplit[4]
+        vals["op"] = lsplit[7]
+        vals["number"] = int(lsplit[8], 16)
+        vals["ptr"] = lsplit[9]
+
+        db = lookup_lockdb(our_nodeid, vals["ls"], vals["number"])
+        if "state plock" in line:
+            vals["event"] = EType.LOCK
+            if vals["op"] == "start":
+                vals["mode"] = lsplit[10]
+                se = lsplit[11].split('-')
+                vals["start"] = int(se[0], 16)
+                vals["end"] = int(se[1], 16)
+                npo = lsplit[12].split('/')
+                vals["nodeid"] = int(npo[0])
+                vals["pid"] = int(npo[1])
+                vals["owner"] = int(npo[2], 16)
+                db.event_start(vals)
+            elif vals["op"] =="end":
+                db.event_end(vals)
+            else:
+                print("{}: unknown state plock operation: {}".format(vals["ts"], vals["op"]))
+        elif "state waiter" in line:
+            vals["event"] = EType.WAITER
+            if vals["op"] == "start":
+                se = lsplit[10].split('-')
+                vals["start"] = int(se[0], 16)
+                vals["end"] = int(se[1], 16)
+                npo = lsplit[11].split('/')
+                vals["nodeid"] = int(npo[0])
+                vals["pid"] = int(npo[1])
+                vals["owner"] = int(npo[2], 16)
+                db.event_start(vals)
+            elif vals["op"] == "end":
+                db.event_end(vals)
+            else:
+                print("{}: unknown state waiter operation: {}".format(vals["ts"], vals["op"]))
+        else:
+            print("received unknown line event: {}".format(line))
+
+if args.info:
+    ts_max = None
+    ts_min = None
+
+    for nodeid in nodes:
+        for ls in nodes[nodeid]:
+            for number in nodes[nodeid][ls]:
+                db = lookup_lockdb(nodeid, ls, number)
+                if not ts_min:
+                    ts_min = db.get_ts_min()
+                else:
+                    ts_min = min(ts_min, db.get_ts_min())
+
+                if not ts_max:
+                    ts_max = db.get_ts_max()
+                else:
+                    ts_max = min(ts_max, db.get_ts_max())
+
+    print("global info:")
+    print("\tts_start: {0:.3f} secs".format(nsecs_to_secs(ts_min)))
+    print("\tts_end: {0:.3f} secs".format(nsecs_to_secs(ts_max)))
+    print("\tduration: {0:.3f} secs".format(nsecs_to_secs(ts_max) - nsecs_to_secs(ts_min)))
+
+    print()
+    print("per inode lock info:")
+    for nodeid in nodes:
+        print("nodeid: {}".format(nodeid))
+        for ls in nodes[nodeid]:
+            print("\tls: {}".format(ls))
+            for number in nodes[nodeid][ls]:
+                print("\t\tinode: {}".format(hex(number)))
+                db = lookup_lockdb(nodeid, ls, number)
+                print("\t\t\tts_start: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_min())))
+                print("\t\t\tts_end: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_max())))
+                print("\t\t\tduration: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_max()) - nsecs_to_secs(db.get_ts_min())))
+
+    exit(0)
+
+for nodeid in nodes:
+    for ls in nodes[nodeid]:
+        for number in nodes[nodeid][ls]:
+            print("create plot: {} {} {}".format(nodeid, ls, number))
+
+            fig = go.Figure()
+            fig.update_layout(title_text="dlm_controld cluster-wide lock database<br>nodeid: {}, lockspace: {}, inode: {}".format(nodeid, ls, hex(number)))
+
+            # fill plot with shapes
+            lookup_lockdb(nodeid, ls, number).add_shapes(fig)
+
+            fig.update_xaxes(title_text='Time (secs)')
+            fig.update_yaxes(title_text='Lock Range (bytes decimal)')
+
+            config = dict({'scrollZoom': True, 'displaylogo': False})
+            fig.show(config=config)
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state Alexander Aring
@ 2023-03-03 13:38   ` Andreas Gruenbacher
  2023-03-03 14:35     ` Alexander Aring
  2023-03-03 15:52     ` Andreas Gruenbacher
  0 siblings, 2 replies; 23+ messages in thread
From: Andreas Gruenbacher @ 2023-03-03 13:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Alexx,

can you please prefix this patch with the following to make this easier
to read?

Thanks,
Andreas

--

dlm_controld: pass lockspace and lock number to add_lock()

The next patch will make use of the additional arguments.
---
 dlm_controld/plock.c | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index b93863f7..6709d205 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -466,8 +466,9 @@ static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get)
 	return 0;
 }
 
-static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
-		    uint32_t pid, int ex, uint64_t start, uint64_t end)
+static int add_lock(const struct lockspace *ls, struct resource *r,
+		    uint32_t nodeid, uint64_t owner, uint32_t pid,
+		    int ex, uint64_t start, uint64_t end, uint64_t number)
 {
 	struct posix_lock *po;
 
@@ -491,8 +492,8 @@ static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
    1. add new lock for non-overlap area of RE, orig mode
    2. convert RE to RN range and mode */
 
-static int lock_case1(struct posix_lock *po, struct resource *r,
-		      struct dlm_plock_info *in)
+static int lock_case1(const struct lockspace *ls, struct posix_lock *po,
+		      struct resource *r, struct dlm_plock_info *in)
 {
 	uint64_t start2, end2;
 	int rv;
@@ -508,7 +509,8 @@ static int lock_case1(struct posix_lock *po, struct resource *r,
 	po->end = in->end;
 	po->ex = in->ex;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid, !in->ex, start2, end2);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid, !in->ex, start2,
+		      end2, in->number);
  out:
 	return rv;
 }
@@ -518,19 +520,20 @@ static int lock_case1(struct posix_lock *po, struct resource *r,
    2. add new lock for back fragment, orig mode
    3. convert RE to RN range and mode */
 			 
-static int lock_case2(struct posix_lock *po, struct resource *r,
-		      struct dlm_plock_info *in)
+static int lock_case2(const struct lockspace *ls, struct posix_lock *po,
+		      struct resource *r, struct dlm_plock_info *in)
 
 {
 	int rv;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      !in->ex, po->start, in->start - 1);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      !in->ex, po->start, in->start - 1,
+		      in->number);
 	if (rv)
 		goto out;
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      !in->ex, in->end + 1, po->end);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      !in->ex, in->end + 1, po->end, in->number);
 	if (rv)
 		goto out;
 
@@ -569,14 +572,14 @@ static int lock_internal(struct lockspace *ls, struct resource *r,
 			if (po->ex == in->ex)
 				goto out;
 
-			rv = lock_case1(po, r, in);
+			rv = lock_case1(ls, po, r, in);
 			goto out;
 
 		case 2:
 			if (po->ex == in->ex)
 				goto out;
 
-			rv = lock_case2(po, r, in);
+			rv = lock_case2(ls, po, r, in);
 			goto out;
 
 		case 3:
@@ -597,8 +600,8 @@ static int lock_internal(struct lockspace *ls, struct resource *r,
 		}
 	}
 
-	rv = add_lock(r, in->nodeid, in->owner, in->pid,
-		      in->ex, in->start, in->end);
+	rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+		      in->ex, in->start, in->end, in->number);
  out:
 	return rv;
 
@@ -638,8 +641,8 @@ static int unlock_internal(struct lockspace *ls, struct resource *r,
 			/* RN within RE - shrink and update RE to be front
 			 * fragment, and add a new lock for back fragment */
 
-			rv = add_lock(r, in->nodeid, in->owner, in->pid,
-				      po->ex, in->end + 1, po->end);
+			rv = add_lock(ls, r, in->nodeid, in->owner, in->pid,
+				      po->ex, in->end + 1, po->end, in->number);
 			po->end = in->start - 1;
 			goto out;
 
@@ -1346,8 +1349,8 @@ static void _receive_sync(struct lockspace *ls, struct dlm_header *hd, int len)
 	}
 
 	if (hd->type == DLM_MSG_PLOCK_SYNC_LOCK)
-		add_lock(r, info.nodeid, info.owner, info.pid, info.ex, 
-			 info.start, info.end);
+		add_lock(ls, r, info.nodeid, info.owner, info.pid, info.ex,
+			 info.start, info.end, info.number);
 	else if (hd->type == DLM_MSG_PLOCK_SYNC_WAITER)
 		add_waiter(ls, r, &info);
 }
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 13:38   ` Andreas Gruenbacher
@ 2023-03-03 14:35     ` Alexander Aring
  2023-03-03 15:52     ` Andreas Gruenbacher
  1 sibling, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 14:35 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, Mar 3, 2023 at 8:38?AM Andreas Gruenbacher <agruenba@redhat.com> wrote:
>
> Alexx,
>
> can you please prefix this patch with the following to make this easier
> to read?
>

okay.

btw: I think we could make a struct lockspace pointer in struct
resource which is all the way passed down.
I think a struct resource instance is ordered under a per "lockspace
-> inode -> resource" hierarchy.

I am still not 100% sure if this is the case, but could be done later
as cleanup as well.

- Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool
  2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool Alexander Aring
@ 2023-03-03 14:40   ` Alexander Aring
  0 siblings, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 14:40 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, Mar 2, 2023 at 12:14?PM Alexander Aring <aahringo@redhat.com> wrote:
>
> This patch adds a python tool to plot with use of python plotly [0] the
> dlm_controld cluster-wide lockdb. It scans dlm_controld plock log files
> for the required information about possible contention (waiters) and the
> acquired lock modes over time.
>
> Example:
>
> ./lockdb_plot /var/log/dlm_controld/plock.log
>
> and plotly will start a webserver and you can see a generated, interactive
> plot about the lockranges in y axis and time in x axis. Depending on
> usage it's limited and does not scale. Ther exists arguments like --info
> to get plock.log timing information and with --ts_start/ts_end you can
> filter a specific time range to only show a small part of the plock log
> to reduce the amount of rectangles need to be drawn by the browser.
>
> It shows only lock modes and contention, no lock requests. Means A trylock
> is only visible when it was successful. A non trylock will be drawn and
> has possible contention.
>
> With this plot it should be easy to figure out on which node a specific
> lock range was acquired and resulted in contention.
>
> [0] https://plotly.com/python/
> ---
>  python/tools/lockdb_plot | 429 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 429 insertions(+)
>  create mode 100755 python/tools/lockdb_plot
>
> diff --git a/python/tools/lockdb_plot b/python/tools/lockdb_plot
> new file mode 100755
> index 00000000..761bdea2
> --- /dev/null
> +++ b/python/tools/lockdb_plot
> @@ -0,0 +1,429 @@
> +#!/usr/bin/env python3
> +
> +import plotly.graph_objects as go
> +from enum import Enum
> +import argparse
> +import sys
> +
> +NSECS_PER_SECS = 1000000000
> +
> +def nsecs_to_secs(nsecs):
> +    return nsecs/NSECS_PER_SECS
> +
> +def secs_to_nsecs(secs):
> +    return secs * NSECS_PER_SECS
> +
> +# entry point of collecting data
> +nodes = dict()
> +# will parse log to get our_nodeid
> +our_nodeid = None
> +# legend groups, tmp set to show legend entry only once
> +legend_groups = set()
> +
> +class EType(Enum):
> +    LOCK = 1
> +    WAITER = 2
> +
> +def lookup_lockdb(nodeid, ls, number):
> +    if nodeid not in nodes:
> +        nodes[nodeid] = dict()
> +
> +    lockspaces = nodes[nodeid]
> +    if ls not in lockspaces:
> +        lockspaces[ls] = dict()
> +
> +    if number not in lockspaces[ls]:
> +        lockspaces[ls][number] = LockDb(nodeid, ls, number)
> +
> +    return lockspaces[ls][number]
> +
> +def plotly_rect_shape(x0, y0, x1, y1, color, opacity, text, group):
> +    # we show it in secs
> +    x0 = nsecs_to_secs(x0)
> +    x1 = nsecs_to_secs(x1)
> +
> +    rv = go.Scatter(x=[x0,x0,x1,x1,x0],
> +                    y=[y0,y1,y1,y0,y0],
> +                    fill="toself",
> +                    fillcolor=color,
> +                    mode='lines',
> +                    name=group,
> +                    legendgroup=group,
> +                    showlegend=group not in legend_groups,
> +                    visible=True,
> +                    text=text,
> +                    line=dict(color=color,
> +                              width=1),
> +                    opacity=opacity)
> +
> +    legend_groups.add(group)
> +    return rv
> +
> +
> +class LockDbRect():
> +    def __init__(self, **kwargs):
> +        self.ls = kwargs["ls"]
> +        self.number = kwargs["number"]
> +        self.pid = kwargs["pid"]
> +        self.nodeid = kwargs["nodeid"]
> +        self.owner = kwargs["owner"]
> +        self.ts_start = kwargs["ts_start"]
> +        self.ts_end = kwargs["ts_end"]
> +        # +1 because lock range 1000-1000 will then be 999-1000
> +        # exclusive vs inclusive we need to decide how to display
> +        # it but we don't set a zero height
> +        self.lk_start = kwargs["lk_start"] - 1
> +        self.lk_end = kwargs["lk_end"]
> +
> +    def create_shape(self):
> +        return plotly_rect_shape(self.ts_start,
> +                                 self.lk_start,
> +                                 self.ts_end,
> +                                 self.lk_end,
> +                                 self.get_color(),
> +                                 self.get_opacity(),
> +                                 self.get_text(),
> +                                 self.get_group())
> +
> +    def get_duration(self):
> +        return self.ts_end - self.ts_start
> +
> +    def get_range(self):
> +        return self.lk_end - self.lk_start
> +
> +    def __str__(self):
> +        return str(self.__dict__)
> +
> +    def __repr__(self):
> +        return str(self)
> +
> +class LockDbRectWaiter(LockDbRect):
> +    def __init__(self, **kwargs):
> +        super(LockDbRectWaiter, self).__init__(**kwargs)
> +
> +    def get_color(self):
> +        return "green"
> +
> +    def get_opacity(self):
> +        return 0.2
> +
> +    def get_text(self):
> +        # TODO make it human readable for high numbers?
> +        ts_start = nsecs_to_secs(self.ts_start);
> +        ts_end = nsecs_to_secs(self.ts_end);
> +        dt = nsecs_to_secs(self.get_duration());
> +
> +        text = "nodeid: {}, pid: {}, owner: {}<br>".format(self.nodeid, self.pid, hex(self.owner)) + \
> +               "ts_start: {0:.3f},".format(ts_start) + \
> +               "ts_end: {0:.3f},".format(ts_end) + \
> +               "dt: {0:.3f}<br>".format(dt) + \
> +               "lk_start: {}, lk_end: {}, dy: {}<br>".format(hex(self.lk_start), hex(self.lk_end), hex(self.get_range()))
> +
> +        return text
> +
> +    def get_text_pl(self):
> +        text = "contention:<br>" + \
> +                "start: {0:.3f},".format(nsecs_to_secs(self.ts_start)) + \
> +                "end: {0:.3f},".format(nsecs_to_secs(self.ts_end)) + \
> +                "dt: {0:.3f}".format(nsecs_to_secs(self.get_duration()))
> +
there are some missing spaces after each comma. I will try to add a
function and use this function for the same code below.

> +        return text
> +
> +    def get_group(self):
> +        return "contention"
> +
> +class LockDbRectPLock(LockDbRect):
> +    def __init__(self, **kwargs):
> +        super(LockDbRectPLock, self).__init__(**kwargs)
> +        self.mode = kwargs["mode"]
> +        if self.mode == "WR":
> +            self.color = "red"
> +            self.group = "write lock"
> +        elif self.mode == "RD":
> +            self.color = "blue"
> +            self.group = "read lock"
> +        else:
> +            print("{}: received unknown mode".format(self.ts_start))
> +            return
> +
> +        self.waiter = None
> +
> +    def __str__(self):
> +        return str(self.__dict__)
> +
> +    def __repr__(self):
> +        return str(self)
> +
> +    def has_waiter(self):
> +        return self.waiter
> +
> +    def set_waiter(self, wa):
> +        if self.has_waiter():
> +            print("{}: has already a waiter set".format(self.ts_start))
> +            return
> +
> +        self.waiter = wa
> +
> +    def get_waiter(self):
> +        return self.waiter
> +
> +    def get_color(self):
> +        return self.color
> +
> +    def get_opacity(self):
> +        return 0.4
> +
> +    def get_text(self):
> +        # TODO make it human readable for high numbers?
> +        ts_start = nsecs_to_secs(self.ts_start);
> +        ts_end = nsecs_to_secs(self.ts_end);
> +        dt = nsecs_to_secs(self.get_duration());
> +
> +        text = "nodeid: {}, pid: {}, owner: {}<br>".format(self.nodeid, self.pid, hex(self.owner)) + \
> +               "ts_start: {0:.3f},".format(ts_start) + \
> +               "ts_end: {0:.3f},".format(ts_end) + \
> +               "dt: {0:.3f}<br>".format(dt) + \

here...

> +               "lk_start: {}, lk_end: {}, dy: {}<br>".format(hex(self.lk_start), hex(self.lk_end), hex(self.get_range()))
> +
> +        if self.has_waiter():
> +            text = text + "<br>{}".format(self.get_waiter().get_text_pl())
> +
> +        return text;
> +
> +    def get_group(self):
> +        return self.group
> +
> +class LockDb:
> +    def __init__(self, nodeid, ls, number):
> +        self.nodeid = nodeid
> +        self.ls = ls
> +        self.number = number
> +        self.__ts = dict()
> +        self.plocks = list()
> +        self.__last_plock = dict()
> +        self.waiters = list()
> +        self.__last_waiter = dict()
> +        self.__ts_start_min = None
> +        self.__ts_end_max = 0
> +
> +    def event_start(self, vals):
> +        self.__ts[vals["ptr"]] = vals
> +
> +    def __plock(self, vals_start, vals_end):
> +        pl = LockDbRectPLock(ls=vals_start["ls"],
> +                             number=vals_start["number"],
> +                             pid=vals_start["pid"],
> +                             nodeid=vals_start["nodeid"],
> +                             owner=vals_start["owner"],
> +                             ts_start=vals_start["ts"],
> +                             ts_end=vals_end["ts"],
> +                             lk_start=vals_start["start"],
> +                             lk_end=vals_start["end"],
> +                             mode=vals_start["mode"])
> +        self.plocks.append(pl)
> +        self.__last_plock[vals_end["ptr"]] = pl
> +
> +        try:
> +            pl.set_waiter(self.__last_waiter[vals_start["owner"]])
> +            del(self.__last_waiter[vals_start["owner"]])
> +        except KeyError:
> +            pass
> +
> +    def __waiter(self, vals_start, vals_end):
> +        wa = LockDbRectWaiter(ls=vals_start["ls"],
> +                              number=vals_start["number"],
> +                              pid=vals_start["pid"],
> +                              nodeid=vals_start["nodeid"],
> +                              owner=vals_start["owner"],
> +                              ts_start=vals_start["ts"],
> +                              ts_end=vals_end["ts"],
> +                              lk_start=vals_start["start"],
> +                              lk_end=vals_start["end"])
> +        self.waiters.append(wa)
> +        self.__last_waiter[vals_start["owner"]] = wa
> +
> +    def event_end(self, vals):
> +        try:
> +            vals_start = self.__ts[vals["ptr"]]
> +            del(self.__ts[vals["ptr"]])
> +        except KeyError:
> +            print("{}: cannot find previous start, ignoring".format(vals["ts"]))
> +            return
> +
> +        if not self.__ts_start_min:
> +            self.__ts_start_min = vals_start["ts"]
> +        else:
> +            self.__ts_start_min = min(self.__ts_start_min, vals_start["ts"])
> +
> +        if not self.__ts_start_min:
> +            self.__ts_end_max = vals["ts"]
> +        else:
> +            self.__ts_end_max = max(self.__ts_end_max, vals["ts"])
> +
> +        if vals["event"] == EType.LOCK:
> +            self.__plock(vals_start, vals)
> +        elif vals["event"] == EType.WAITER:
> +            self.__waiter(vals_start, vals)
> +        else:
> +            print("{}: unknown event type".format(vals["ts"]))
> +
> +    def add_shapes(self, fig):
> +        global legend_groups
> +        legend_groups = set()
> +
> +        # first waiters to be in background
> +        for wa in self.waiters:
> +            fig.add_trace(wa.create_shape())
> +
> +        # next layer to draw lock states
> +        for pl in self.plocks:
> +            fig.add_trace(pl.create_shape())
> +
> +    def get_ts_min(self):
> +        return self.__ts_start_min
> +
> +    def get_ts_max(self):
> +        return self.__ts_end_max
> +
> +    def __str__(self):
> +        return "ls {} inode {}".format(self.ls, self.number)
> +
> +    def __repr__(self):
> +        return str(self)
> +
> +parser = argparse.ArgumentParser(description="Plots dlm_controld lock databases")
> +parser.add_argument('-i', "--info", action="store_true")
> +parser.add_argument('-s', "--ts_start", type=float)
> +parser.add_argument('-e', "--ts_end", type=float)
> +
> +parser.add_argument('plock_logs', type=argparse.FileType('r'), nargs='+')
> +args = parser.parse_args()
> +
> +for f in args.plock_logs:
> +    lines = f.readlines()
> +
> +    # Current way to figure out our nodeid
> +    for line in lines:
> +        if "read plock" in line:
> +            lsplit = line.split()
> +            npo = lsplit[11].split('/')
> +            nodeid = int(npo[0])
> +            our_nodeid = nodeid
> +            break
> +
> +    if not our_nodeid:
> +        print("{}: could not determine our nodeid".format(f))
> +        continue
> +
> +    for line in lines:
> +        if "state plock" in line:
> +            pass
> +        elif "state waiter" in line:
> +            pass
> +        else:
> +            continue
> +
> +        vals = dict()
> +        lsplit = line.split()
> +        vals["ts"] = int(lsplit[3])
> +
> +        if args.ts_start and secs_to_nsecs(args.ts_start) > int(vals["ts"]):
> +            continue
> +
> +        if args.ts_end and secs_to_nsecs(args.ts_end) < int(vals["ts"]):
> +            break;
> +
> +        vals["ls"] = lsplit[4]
> +        vals["op"] = lsplit[7]
> +        vals["number"] = int(lsplit[8], 16)
> +        vals["ptr"] = lsplit[9]
> +
> +        db = lookup_lockdb(our_nodeid, vals["ls"], vals["number"])
> +        if "state plock" in line:
> +            vals["event"] = EType.LOCK
> +            if vals["op"] == "start":
> +                vals["mode"] = lsplit[10]
> +                se = lsplit[11].split('-')
> +                vals["start"] = int(se[0], 16)
> +                vals["end"] = int(se[1], 16)
> +                npo = lsplit[12].split('/')
> +                vals["nodeid"] = int(npo[0])
> +                vals["pid"] = int(npo[1])
> +                vals["owner"] = int(npo[2], 16)
> +                db.event_start(vals)
> +            elif vals["op"] =="end":
> +                db.event_end(vals)
> +            else:
> +                print("{}: unknown state plock operation: {}".format(vals["ts"], vals["op"]))
> +        elif "state waiter" in line:
> +            vals["event"] = EType.WAITER
> +            if vals["op"] == "start":
> +                se = lsplit[10].split('-')
> +                vals["start"] = int(se[0], 16)
> +                vals["end"] = int(se[1], 16)
> +                npo = lsplit[11].split('/')
> +                vals["nodeid"] = int(npo[0])
> +                vals["pid"] = int(npo[1])
> +                vals["owner"] = int(npo[2], 16)
> +                db.event_start(vals)
> +            elif vals["op"] == "end":
> +                db.event_end(vals)
> +            else:
> +                print("{}: unknown state waiter operation: {}".format(vals["ts"], vals["op"]))
> +        else:
> +            print("received unknown line event: {}".format(line))
> +
> +if args.info:
> +    ts_max = None
> +    ts_min = None
> +
> +    for nodeid in nodes:
> +        for ls in nodes[nodeid]:
> +            for number in nodes[nodeid][ls]:
> +                db = lookup_lockdb(nodeid, ls, number)
> +                if not ts_min:
> +                    ts_min = db.get_ts_min()
> +                else:
> +                    ts_min = min(ts_min, db.get_ts_min())
> +
> +                if not ts_max:
> +                    ts_max = db.get_ts_max()
> +                else:
> +                    ts_max = min(ts_max, db.get_ts_max())
> +
> +    print("global info:")
> +    print("\tts_start: {0:.3f} secs".format(nsecs_to_secs(ts_min)))
> +    print("\tts_end: {0:.3f} secs".format(nsecs_to_secs(ts_max)))
> +    print("\tduration: {0:.3f} secs".format(nsecs_to_secs(ts_max) - nsecs_to_secs(ts_min)))
> +
> +    print()
> +    print("per inode lock info:")
> +    for nodeid in nodes:
> +        print("nodeid: {}".format(nodeid))
> +        for ls in nodes[nodeid]:
> +            print("\tls: {}".format(ls))
> +            for number in nodes[nodeid][ls]:
> +                print("\t\tinode: {}".format(hex(number)))
> +                db = lookup_lockdb(nodeid, ls, number)
> +                print("\t\t\tts_start: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_min())))
> +                print("\t\t\tts_end: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_max())))
> +                print("\t\t\tduration: {0:.3f} secs".format(nsecs_to_secs(db.get_ts_max()) - nsecs_to_secs(db.get_ts_min())))
> +

I will add a "top 5" high score of the longest duration of contention
state... So if somebody is looking for long contention states it is
easy to find them with a fast parsing of the log files.

- Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 13:38   ` Andreas Gruenbacher
  2023-03-03 14:35     ` Alexander Aring
@ 2023-03-03 15:52     ` Andreas Gruenbacher
  2023-03-03 16:02       ` Andreas Gruenbacher
  2023-03-03 22:31       ` Alexander Aring
  1 sibling, 2 replies; 23+ messages in thread
From: Andreas Gruenbacher @ 2023-03-03 15:52 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Now, let me get to the core of the matter.  We've been talking about
using user-space (SDT) trace points for collecting the data, and I still
think that that's what we should do instead of introducing a new
dlm_controld log file.  In the dlm_controld code, this would look like
the below patch.

Note that <sys/sdt.h> is part of the systemtap-sdt-devel package, so a
"BuildRequires: systemtap-sdt-devel" dependency will be needed in
dlm.spec.

With that, we can use standard tools like perf, bpftrace, etc. for
collecting all the relevant information without any further
modifications to dlm_controld.  We can also collect additional kernel
and user-space trace point data at the same time with very little
additional effort.

For example, here is how to register the four plock dlm_controld trace
points in perf:

  for ev in \
      sdt_dlm_controld:plock_lock_begin \
      sdt_dlm_controld:plock_lock_end \
      sdt_dlm_controld:plock_wait_begin \
      sdt_dlm_controld:plock_wait_end; do \
    perf probe -x /usr/sbin/dlm_controld $ev; \
  done

The events can then be recorded with "perf record":

  perf record \
    -e sdt_dlm_controld:plock_lock_begin \
    -e sdt_dlm_controld:plock_lock_end \
    -e sdt_dlm_controld:plock_wait_begin \
    -e sdt_dlm_controld:plock_wait_end \
    [...]

We've already gone through how the resulting log can be processed with
"perf script".  One possible result would be the log file format that
lockdb_plot expects, but there are countless other possibilities.

Other useful "tricks":

  $ bpftrace -l 'usdt:/usr/sbin/dlm_controld:*'

  $ readelf -n /usr/sbin/dlm_controld | sed -ne '/\.note\.stapsdt/,/^$/p'


Thanks,
Andreas

-- 
 dlm_controld/plock.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 39bdd1f6..588bcaaa 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -8,6 +8,7 @@
 
 #include "dlm_daemon.h"
 #include <linux/dlm_plock.h>
+#include <sys/sdt.h>
 
 /* FIXME: remove this once everyone is using the version of
  * dlm_plock.h which defines it */
@@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
 static void plock_print_start_waiter(const struct lockspace *ls,
 				     struct lock_waiter *w)
 {
+	const struct dlm_plock_info *info = &w->info;
+
+	DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
+		      info->end, info->nodeid, info->pid, info->owner);
+
 	log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
 		  (unsigned long long)w->info.number,
 		  w,
@@ -223,6 +229,10 @@ static void plock_print_start_waiter(const struct lockspace *ls,
 static void plock_print_end_waiter(const struct lockspace *ls,
 				   const struct lock_waiter *w)
 {
+	const struct dlm_plock_info *info = &w->info;
+
+	DTRACE_PROBE2(dlm_controld, plock_wait_end, info->number, w);
+
 	log_plock(ls, "state waiter end %llx %p",
 		  (unsigned long long)w->info.number, w);
 }
@@ -230,6 +240,9 @@ static void plock_print_end_waiter(const struct lockspace *ls,
 static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
 				    const struct posix_lock *po)
 {
+	DTRACE_PROBE8(dlm_controld, plock_lock_begin, number, po, po->ex, po->start,
+		      po->end, po->nodeid, po->pid, po->owner);
+
 	log_plock(ls, "state plock start %llx %p %s %llx-%llx %d/%u/%llx",
 		  (unsigned long long)number,
 		  po,
@@ -243,6 +256,8 @@ static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
 static void plock_print_end_plock(const struct lockspace *ls, uint64_t number,
 				  const struct posix_lock *po)
 {
+	DTRACE_PROBE1(dlm_controld, pock_lock_end, po);
+
 	log_plock(ls, "state plock end %llx %p",
 		  (unsigned long long)number, po);
 }
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 15:52     ` Andreas Gruenbacher
@ 2023-03-03 16:02       ` Andreas Gruenbacher
  2023-03-03 22:20         ` Alexander Aring
  2023-03-03 22:31       ` Alexander Aring
  1 sibling, 1 reply; 23+ messages in thread
From: Andreas Gruenbacher @ 2023-03-03 16:02 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Fri, Mar 3, 2023 at 4:53?PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
> index 39bdd1f6..588bcaaa 100644
> --- a/dlm_controld/plock.c
> +++ b/dlm_controld/plock.c
> @@ -8,6 +8,7 @@
>
>  #include "dlm_daemon.h"
>  #include <linux/dlm_plock.h>
> +#include <sys/sdt.h>
>
>  /* FIXME: remove this once everyone is using the version of
>   * dlm_plock.h which defines it */
> @@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
>  static void plock_print_start_waiter(const struct lockspace *ls,
>                                      struct lock_waiter *w)
>  {
> +       const struct dlm_plock_info *info = &w->info;
> +
> +       DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
> +                     info->end, info->nodeid, info->pid, info->owner);
> +
>         log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
>                   (unsigned long long)w->info.number,
>                   w,

An additional question I have about those events is which information
to log. We need to be able to identify which inode the request is for
(info->number), the locking range (info->start and info->end), whether
it is read or write lock, and which context in the kernel the request
refers to (this seems to be info->owner, but I'm not entirely sure
about that). The pid may be interesting as well, but are w or
info->nodeid really useful? We should try to avoid exposing
unnecessary implementation details like the addresses of objects
inside dlm_controld.

> @@ -223,6 +229,10 @@ static void plock_print_start_waiter(const struct lockspace *ls,
>  static void plock_print_end_waiter(const struct lockspace *ls,
>                                    const struct lock_waiter *w)
>  {
> +       const struct dlm_plock_info *info = &w->info;
> +
> +       DTRACE_PROBE2(dlm_controld, plock_wait_end, info->number, w);
> +
>         log_plock(ls, "state waiter end %llx %p",
>                   (unsigned long long)w->info.number, w);
>  }
> @@ -230,6 +240,9 @@ static void plock_print_end_waiter(const struct lockspace *ls,
>  static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
>                                     const struct posix_lock *po)
>  {
> +       DTRACE_PROBE8(dlm_controld, plock_lock_begin, number, po, po->ex, po->start,
> +                     po->end, po->nodeid, po->pid, po->owner);
> +
>         log_plock(ls, "state plock start %llx %p %s %llx-%llx %d/%u/%llx",
>                   (unsigned long long)number,
>                   po,
> @@ -243,6 +256,8 @@ static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
>  static void plock_print_end_plock(const struct lockspace *ls, uint64_t number,
>                                   const struct posix_lock *po)
>  {
> +       DTRACE_PROBE1(dlm_controld, pock_lock_end, po);
> +
>         log_plock(ls, "state plock end %llx %p",
>                   (unsigned long long)number, po);
>  }
> --
> 2.39.0
>

Thanks,
Andreas


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 16:02       ` Andreas Gruenbacher
@ 2023-03-03 22:20         ` Alexander Aring
  2023-03-03 22:28           ` Alexander Aring
  2023-03-03 22:43           ` Alexander Aring
  0 siblings, 2 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 22:20 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, Mar 3, 2023 at 11:08?AM Andreas Gruenbacher <agruenba@redhat.com> wrote:
>
> On Fri, Mar 3, 2023 at 4:53?PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> > diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
> > index 39bdd1f6..588bcaaa 100644
> > --- a/dlm_controld/plock.c
> > +++ b/dlm_controld/plock.c
> > @@ -8,6 +8,7 @@
> >
> >  #include "dlm_daemon.h"
> >  #include <linux/dlm_plock.h>
> > +#include <sys/sdt.h>
> >

does this require an additional dependency?

> >  /* FIXME: remove this once everyone is using the version of
> >   * dlm_plock.h which defines it */
> > @@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
> >  static void plock_print_start_waiter(const struct lockspace *ls,
> >                                      struct lock_waiter *w)
> >  {
> > +       const struct dlm_plock_info *info = &w->info;
> > +
> > +       DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
> > +                     info->end, info->nodeid, info->pid, info->owner);
> > +
> >         log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
> >                   (unsigned long long)w->info.number,
> >                   w,
>
> An additional question I have about those events is which information
> to log. We need to be able to identify which inode the request is for
> (info->number), the locking range (info->start and info->end), whether
> it is read or write lock, and which context in the kernel the request
> refers to (this seems to be info->owner, but I'm not entirely sure
> about that). The pid may be interesting as well, but are w or

w is always true for waiters, it's just stored there as the structure
is used in other places. It means if it's not a trylock or not. When
we have a waiter we always have no trylock, so w == 1.

> info->nodeid really useful? We should try to avoid exposing

yes it is useful. So far I see you need a nodeid/pid kombination to
see which pid had the lock state for a specific time. nodeid will tell
you on which node the pid runs on, the same pid _could_ occur twice,
without nodeid you don't know which process in the cluster had the
state. Either lock or in this case contention state.

> unnecessary implementation details like the addresses of objects
> inside dlm_controld.
>

I am using it as a unique handle (probably the nodeid needs to be
considered here as well, because the same argument like pid just more
unlikely) to look up the start trace/log to the end trace. I used a
hash value of some values, but printing out those values twice will
just blow up the trace/log size. Or?

- Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 22:20         ` Alexander Aring
@ 2023-03-03 22:28           ` Alexander Aring
  2023-03-03 22:43           ` Alexander Aring
  1 sibling, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 22:28 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, Mar 3, 2023 at 5:20?PM Alexander Aring <aahringo@redhat.com> wrote:
>
> Hi,
>
> On Fri, Mar 3, 2023 at 11:08?AM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> >
> > On Fri, Mar 3, 2023 at 4:53?PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> > > diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
> > > index 39bdd1f6..588bcaaa 100644
> > > --- a/dlm_controld/plock.c
> > > +++ b/dlm_controld/plock.c
> > > @@ -8,6 +8,7 @@
> > >
> > >  #include "dlm_daemon.h"
> > >  #include <linux/dlm_plock.h>
> > > +#include <sys/sdt.h>
> > >
>
> does this require an additional dependency?
>
> > >  /* FIXME: remove this once everyone is using the version of
> > >   * dlm_plock.h which defines it */
> > > @@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
> > >  static void plock_print_start_waiter(const struct lockspace *ls,
> > >                                      struct lock_waiter *w)
> > >  {
> > > +       const struct dlm_plock_info *info = &w->info;
> > > +
> > > +       DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
> > > +                     info->end, info->nodeid, info->pid, info->owner);
> > > +
> > >         log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
> > >                   (unsigned long long)w->info.number,
> > >                   w,
> >
> > An additional question I have about those events is which information
> > to log. We need to be able to identify which inode the request is for
> > (info->number), the locking range (info->start and info->end), whether
> > it is read or write lock, and which context in the kernel the request
> > refers to (this seems to be info->owner, but I'm not entirely sure
> > about that). The pid may be interesting as well, but are w or
>
> w is always true for waiters, it's just stored there as the structure
> is used in other places. It means if it's not a trylock or not. When
> we have a waiter we always have no trylock, so w == 1.
>

I am sorry, I thought you meant the w field in "w->w". As I explain it
below, I use it as a unique handle to make the "start", "end" event
matching.

 Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 15:52     ` Andreas Gruenbacher
  2023-03-03 16:02       ` Andreas Gruenbacher
@ 2023-03-03 22:31       ` Alexander Aring
  1 sibling, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 22:31 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, Mar 3, 2023 at 10:52?AM Andreas Gruenbacher <agruenba@redhat.com> wrote:
>
> Now, let me get to the core of the matter.  We've been talking about
> using user-space (SDT) trace points for collecting the data, and I still
> think that that's what we should do instead of introducing a new
> dlm_controld log file.  In the dlm_controld code, this would look like
> the below patch.
>
> Note that <sys/sdt.h> is part of the systemtap-sdt-devel package, so a
> "BuildRequires: systemtap-sdt-devel" dependency will be needed in
> dlm.spec.
>

ah, ok. This answers my other question I had.

> With that, we can use standard tools like perf, bpftrace, etc. for
> collecting all the relevant information without any further
> modifications to dlm_controld.  We can also collect additional kernel
> and user-space trace point data at the same time with very little
> additional effort.
>
> For example, here is how to register the four plock dlm_controld trace
> points in perf:
>
>   for ev in \
>       sdt_dlm_controld:plock_lock_begin \
>       sdt_dlm_controld:plock_lock_end \
>       sdt_dlm_controld:plock_wait_begin \
>       sdt_dlm_controld:plock_wait_end; do \
>     perf probe -x /usr/sbin/dlm_controld $ev; \
>   done
>
> The events can then be recorded with "perf record":
>
>   perf record \
>     -e sdt_dlm_controld:plock_lock_begin \
>     -e sdt_dlm_controld:plock_lock_end \
>     -e sdt_dlm_controld:plock_wait_begin \
>     -e sdt_dlm_controld:plock_wait_end \
>     [...]
>
> We've already gone through how the resulting log can be processed with
> "perf script".  One possible result would be the log file format that
> lockdb_plot expects, but there are countless other possibilities.
>
> Other useful "tricks":
>
>   $ bpftrace -l 'usdt:/usr/sbin/dlm_controld:*'
>
>   $ readelf -n /usr/sbin/dlm_controld | sed -ne '/\.note\.stapsdt/,/^$/p'
>

look easy enough.

- Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state
  2023-03-03 22:20         ` Alexander Aring
  2023-03-03 22:28           ` Alexander Aring
@ 2023-03-03 22:43           ` Alexander Aring
  1 sibling, 0 replies; 23+ messages in thread
From: Alexander Aring @ 2023-03-03 22:43 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, Mar 3, 2023 at 5:20?PM Alexander Aring <aahringo@redhat.com> wrote:
>
> Hi,
>
> On Fri, Mar 3, 2023 at 11:08?AM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> >
> > On Fri, Mar 3, 2023 at 4:53?PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
> > > diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
> > > index 39bdd1f6..588bcaaa 100644
> > > --- a/dlm_controld/plock.c
> > > +++ b/dlm_controld/plock.c
> > > @@ -8,6 +8,7 @@
> > >
> > >  #include "dlm_daemon.h"
> > >  #include <linux/dlm_plock.h>
> > > +#include <sys/sdt.h>
> > >
>
> does this require an additional dependency?
>
> > >  /* FIXME: remove this once everyone is using the version of
> > >   * dlm_plock.h which defines it */
> > > @@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
> > >  static void plock_print_start_waiter(const struct lockspace *ls,
> > >                                      struct lock_waiter *w)
> > >  {
> > > +       const struct dlm_plock_info *info = &w->info;
> > > +
> > > +       DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
> > > +                     info->end, info->nodeid, info->pid, info->owner);
> > > +
> > >         log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
> > >                   (unsigned long long)w->info.number,
> > >                   w,
> >
> > An additional question I have about those events is which information
> > to log. We need to be able to identify which inode the request is for
> > (info->number), the locking range (info->start and info->end), whether
> > it is read or write lock, and which context in the kernel the request
> > refers to (this seems to be info->owner, but I'm not entirely sure
> > about that). The pid may be interesting as well, but are w or
>
> w is always true for waiters, it's just stored there as the structure
> is used in other places. It means if it's not a trylock or not. When
> we have a waiter we always have no trylock, so w == 1.
>
> > info->nodeid really useful? We should try to avoid exposing
>
> yes it is useful. So far I see you need a nodeid/pid kombination to
> see which pid had the lock state for a specific time. nodeid will tell
> you on which node the pid runs on, the same pid _could_ occur twice,
> without nodeid you don't know which process in the cluster had the
> state. Either lock or in this case contention state.
>
> > unnecessary implementation details like the addresses of objects
> > inside dlm_controld.
> >
>
> I am using it as a unique handle (probably the nodeid needs to be
> considered here as well, because the same argument like pid just more
> unlikely) to look up the start trace/log to the end trace. I used a

no, we don't need the nodeid here. We don't trace multiple
applications into one file, then yes. But we separate them in their
own trace/log file.
And using the pointer is valid because this structure should be
add/del to the list and cannot be e.g. added twice.

- Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2023-03-03 22:43 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-02 17:14 [Cluster-devel] [PATCH dlm-tool 01/14] dlm_tool: add fail functionality if dump failed Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 02/14] dlm_controld: always create logdir Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 03/14] dlm_controld: add plock logfile Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 04/14] dlm_controld: move processing of saved messages to plock level Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 05/14] dlm_controld: remove ls parameter Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 06/14] dlm_controld: constify timeval of dt_usec() Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 07/14] dlm_controld: add gcc format printf attribute to log_level Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 08/14] dlm_controld: enable nanosec logging Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 09/14] dlm_controld: use write_result() Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 10/14] dlm_controld: be sure we stop lockspaces before shutdown Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 11/14] dlm_controld: constify name_in in log_level() Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 12/14] dlm_controld: plock log waiters state Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state Alexander Aring
2023-03-03 13:38   ` Andreas Gruenbacher
2023-03-03 14:35     ` Alexander Aring
2023-03-03 15:52     ` Andreas Gruenbacher
2023-03-03 16:02       ` Andreas Gruenbacher
2023-03-03 22:20         ` Alexander Aring
2023-03-03 22:28           ` Alexander Aring
2023-03-03 22:43           ` Alexander Aring
2023-03-03 22:31       ` Alexander Aring
2023-03-02 17:14 ` [Cluster-devel] [PATCH dlm-tool 14/14] python: add posix lockdb plot tool Alexander Aring
2023-03-03 14:40   ` Alexander Aring

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.