xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [Xen-devel] [PATCH 0/4] enhance lock debugging
@ 2019-08-07 14:31 Juergen Gross
  2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
                   ` (4 more replies)
  0 siblings, 5 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-07 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich

While hunting a locking problem in my core scheduling series I have
added some debugging aids to spinlock handling making it easier to
find the root cause for the problem.

Making use of the already lock profiling and enhancing it a little
bit produces some really valuable diagnostic data e.g. when a NMI
watchdog is triggering a crash.

Juergen Gross (4):
  xen/spinlocks: in debug builds store cpu holding the lock
  xen: add new CONFIG_SPINLOCK_DEBUG option
  xen: print lock profile info in panic()
  xen: modify lock profiling interface

 tools/libxc/xc_misc.c       |   1 +
 tools/misc/xenlockprof.c    |  17 ++----
 xen/Kconfig.debug           |   8 +++
 xen/common/domain.c         |   4 +-
 xen/common/spinlock.c       | 134 +++++++++++++++++++++++++++++++-------------
 xen/drivers/char/console.c  |   4 +-
 xen/include/public/sysctl.h |  11 ++--
 xen/include/xen/spinlock.h  |  42 ++++++++------
 8 files changed, 142 insertions(+), 79 deletions(-)

-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
@ 2019-08-07 14:31 ` Juergen Gross
  2019-08-07 17:05   ` Andrew Cooper
  2019-08-08  6:58   ` Jan Beulich
  2019-08-07 14:31 ` [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option Juergen Gross
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-07 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich

Add the cpu currently holding the lock to struct lock_debug. This makes
analysis of locking errors easier and it can be tested whether the
correct cpu is releasing a lock again.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/spinlock.c      | 31 +++++++++++++++++++++++++------
 xen/include/xen/spinlock.h | 16 +++++++++++-----
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 6bc52d70c0..4e681cc651 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -13,9 +13,9 @@
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
-static void check_lock(struct lock_debug *debug)
+static void check_lock(union lock_debug *debug)
 {
-    int irq_safe = !local_irq_is_enabled();
+    unsigned short irq_safe = !local_irq_is_enabled();
 
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug)
      */
     if ( unlikely(debug->irq_safe != irq_safe) )
     {
-        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+        union lock_debug seen, new = { 0 };
 
-        if ( seen == !irq_safe )
+        new.irq_safe = irq_safe;
+        seen.val = cmpxchg(&debug->val, 0xffff, new.val);
+
+        if ( !seen.unused && seen.irq_safe == !irq_safe )
         {
             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
-                   seen, irq_safe);
+                   seen.irq_safe, irq_safe);
             BUG();
         }
     }
 }
 
-static void check_barrier(struct lock_debug *debug)
+static void check_barrier(union lock_debug *debug)
 {
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -73,6 +76,17 @@ static void check_barrier(struct lock_debug *debug)
     BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
 }
 
+static void got_lock(union lock_debug *debug)
+{
+    debug->cpu = smp_processor_id();
+}
+
+static void rel_lock(union lock_debug *debug)
+{
+    ASSERT(debug->cpu == smp_processor_id());
+    debug->cpu = SPINLOCK_NO_CPU;
+}
+
 void spin_debug_enable(void)
 {
     atomic_inc(&spin_debug);
@@ -87,6 +101,8 @@ void spin_debug_disable(void)
 
 #define check_lock(l) ((void)0)
 #define check_barrier(l) ((void)0)
+#define got_lock(l) ((void)0)
+#define rel_lock(l) ((void)0)
 
 #endif
 
@@ -150,6 +166,7 @@ void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
             cb(data);
         arch_lock_relax();
     }
+    got_lock(&lock->debug);
     LOCK_PROFILE_GOT;
     preempt_disable();
     arch_lock_acquire_barrier();
@@ -181,6 +198,7 @@ void _spin_unlock(spinlock_t *lock)
     arch_lock_release_barrier();
     preempt_enable();
     LOCK_PROFILE_REL;
+    rel_lock(&lock->debug);
     add_sized(&lock->tickets.head, 1);
     arch_lock_signal();
 }
@@ -224,6 +242,7 @@ int _spin_trylock(spinlock_t *lock)
     if ( cmpxchg(&lock->tickets.head_tail,
                  old.head_tail, new.head_tail) != old.head_tail )
         return 0;
+    got_lock(&lock->debug);
 #ifdef CONFIG_LOCK_PROFILE
     if (lock->profile)
         lock->profile->time_locked = NOW();
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index a811b73bf3..b4881ad433 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -7,14 +7,20 @@
 #include <xen/percpu.h>
 
 #ifndef NDEBUG
-struct lock_debug {
-    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+union lock_debug {
+    unsigned short val;
+    struct {
+        unsigned short unused:1;
+        unsigned short irq_safe:1;
+        unsigned short pad:2;
+        unsigned short cpu:12;
+    };
 };
-#define _LOCK_DEBUG { -1 }
+#define _LOCK_DEBUG { 0xffff }
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
-struct lock_debug { };
+union lock_debug { };
 #define _LOCK_DEBUG { }
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
@@ -143,7 +149,7 @@ typedef struct spinlock {
 #define SPINLOCK_NO_CPU 0xfffu
     u16 recurse_cnt:4;
 #define SPINLOCK_MAX_RECURSE 0xfu
-    struct lock_debug debug;
+    union lock_debug debug;
 #ifdef CONFIG_LOCK_PROFILE
     struct lock_profile *profile;
 #endif
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
  2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
@ 2019-08-07 14:31 ` Juergen Gross
  2019-08-07 17:17   ` Andrew Cooper
  2019-08-08  6:34   ` Jan Beulich
  2019-08-07 14:31 ` [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic() Juergen Gross
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-07 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich

Instead of enabling spinlock debugging for debug builds only add a
dedicated Kconfig option for that purpose which defaults to DEBUG.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/Kconfig.debug          | 7 +++++++
 xen/common/spinlock.c      | 4 ++--
 xen/include/xen/spinlock.h | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
index e10e314e25..5db3e7ec25 100644
--- a/xen/Kconfig.debug
+++ b/xen/Kconfig.debug
@@ -44,6 +44,13 @@ config COVERAGE
 
 	  If unsure, say N here.
 
+config SPINLOCK_DEBUG
+	bool "Spinlock debugging"
+	default DEBUG
+	---help---
+	  Enable debugging features of spinlock handling.  Some additional
+          checks will be performed when acquiring and releasing locks.
+
 config LOCK_PROFILE
 	bool "Lock Profiling"
 	---help---
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 4e681cc651..16356ec9b7 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -9,7 +9,7 @@
 #include <asm/processor.h>
 #include <asm/atomic.h>
 
-#ifndef NDEBUG
+#ifdef CONFIG_SPINLOCK_DEBUG
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
@@ -97,7 +97,7 @@ void spin_debug_disable(void)
     atomic_dec(&spin_debug);
 }
 
-#else /* defined(NDEBUG) */
+#else /* CONFIG_SPINLOCK_DEBUG */
 
 #define check_lock(l) ((void)0)
 #define check_barrier(l) ((void)0)
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index b4881ad433..e3458f10dd 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -6,7 +6,7 @@
 #include <asm/types.h>
 #include <xen/percpu.h>
 
-#ifndef NDEBUG
+#ifdef CONFIG_SPINLOCK_DEBUG
 union lock_debug {
     unsigned short val;
     struct {
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic()
  2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
  2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
  2019-08-07 14:31 ` [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option Juergen Gross
@ 2019-08-07 14:31 ` Juergen Gross
  2019-08-08  7:09   ` Jan Beulich
  2019-08-07 14:31 ` [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface Juergen Gross
  2019-08-07 18:11 ` [Xen-devel] [PATCH 0/4] enhance lock debugging Andrew Cooper
  4 siblings, 1 reply; 36+ messages in thread
From: Juergen Gross @ 2019-08-07 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich

Print the lock profile data when the system crashes and add some more
information for each lock data (lock address, cpu holding the lock).

This is especially beneficial for watchdog triggered crashes in case
of deadlocks.

In order to have the cpu holding the lock available let the
LOCK_PROFILE config option select SPINLOCK_DEBUG.

As printing the lock profile data will make use of locking, too, we
need to disable spinlock debugging before calling
spinlock_profile_printall() from panic().

While at it remove a superfluous #ifdef CONFIG_LOCK_PROFILE.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/Kconfig.debug          |  1 +
 xen/common/spinlock.c      | 11 +++++++----
 xen/drivers/char/console.c |  4 +++-
 xen/include/xen/spinlock.h |  1 +
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
index 5db3e7ec25..a7387f1630 100644
--- a/xen/Kconfig.debug
+++ b/xen/Kconfig.debug
@@ -53,6 +53,7 @@ config SPINLOCK_DEBUG
 
 config LOCK_PROFILE
 	bool "Lock Profiling"
+	select SPINLOCK_DEBUG
 	---help---
 	  Lock profiling allows you to see how often locks are taken and blocked.
 	  You can use serial console to print (and reset) using 'l' and 'L'
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 16356ec9b7..5730076581 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -362,10 +362,13 @@ static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
 static void spinlock_profile_print_elem(struct lock_profile *data,
     int32_t type, int32_t idx, void *par)
 {
-    if ( type == LOCKPROF_TYPE_GLOBAL )
-        printk("%s %s:\n", lock_profile_ancs[type].name, data->name);
-    else
-        printk("%s %d %s:\n", lock_profile_ancs[type].name, idx, data->name);
+    struct spinlock *lock = data->lock;
+
+    printk("%s ", lock_profile_ancs[type].name);
+    if ( type != LOCKPROF_TYPE_GLOBAL )
+        printk("%d ", idx);
+    printk("%s: addr=%p, lockval=%04x, cpu=%d\n", data->name, lock,
+           lock->tickets.head_tail, lock->debug.cpu);
     printk("  lock:%12"PRId64"(%08X:%08X), block:%12"PRId64"(%08X:%08X)\n",
            data->lock_cnt, (u32)(data->time_hold >> 32), (u32)data->time_hold,
            data->block_cnt, (u32)(data->time_block >> 32),
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 1680d078db..404d28a725 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -1167,7 +1167,9 @@ void panic(const char *fmt, ...)
     unsigned long flags;
     static DEFINE_SPINLOCK(lock);
     static char buf[128];
-    
+
+    spin_debug_disable();
+    spinlock_profile_printall('\0');
     debugtrace_dump();
 
     /* Protects buf[] and ensure multi-line message prints atomically. */
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index e3458f10dd..26891445fe 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -130,6 +130,7 @@ struct lock_profile_qhead { };
 #define spin_lock_init_prof(s, l) spin_lock_init(&((s)->l))
 #define lock_profile_register_struct(type, ptr, idx, print)
 #define lock_profile_deregister_struct(type, ptr)
+#define spinlock_profile_printall(key)
 
 #endif
 
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface
  2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
                   ` (2 preceding siblings ...)
  2019-08-07 14:31 ` [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic() Juergen Gross
@ 2019-08-07 14:31 ` Juergen Gross
  2019-08-08  7:32   ` Jan Beulich
  2019-08-07 18:11 ` [Xen-devel] [PATCH 0/4] enhance lock debugging Andrew Cooper
  4 siblings, 1 reply; 36+ messages in thread
From: Juergen Gross @ 2019-08-07 14:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, Julien Grall, Jan Beulich

Today adding locks located in a struct to lock profiling requires a
unique type index for each structure. This makes it hard to add any
new structure as the related sysctl interface needs to be changed, too.

Instead of using an index the already existing struct name specified
in lock_profile_register_struct() can be used as an identifier instead.

Modify the sysctl interface to use the struct name instead of the type
index and adapt the related coding accordingly. Instead of an array of
struct anchors for lock profiling we now use a linked list for that
purpose. Use the special idx value -1 for cases where the idx isn't
relevant (global locks) and shouldn't be printed.

Increment the sysctl.h interface version as an interface is being
modified.

Add the missing setting of profiling time to xc_lockprof_query().

Add freeing element data when deregistering a structure.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 tools/libxc/xc_misc.c       |  1 +
 tools/misc/xenlockprof.c    | 17 ++-------
 xen/common/domain.c         |  4 +-
 xen/common/spinlock.c       | 92 +++++++++++++++++++++++++++++++--------------
 xen/include/public/sysctl.h | 11 ++----
 xen/include/xen/spinlock.h  | 23 ++++++------
 6 files changed, 86 insertions(+), 62 deletions(-)

diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 5e6714ae2b..68c17293bd 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -510,6 +510,7 @@ int xc_lockprof_query(xc_interface *xch,
     rc = do_sysctl(xch, &sysctl);
 
     *n_elems = sysctl.u.lockprof_op.nr_elem;
+    *time = sysctl.u.lockprof_op.time;
 
     return rc;
 }
diff --git a/tools/misc/xenlockprof.c b/tools/misc/xenlockprof.c
index 11f43a35e3..c6aa3fe841 100644
--- a/tools/misc/xenlockprof.c
+++ b/tools/misc/xenlockprof.c
@@ -88,19 +88,10 @@ int main(int argc, char *argv[])
     sb = 0;
     for ( j = 0; j < i; j++ )
     {
-        switch ( data[j].type )
-        {
-        case LOCKPROF_TYPE_GLOBAL:
-            sprintf(name, "global lock %s", data[j].name);
-            break;
-        case LOCKPROF_TYPE_PERDOM:
-            sprintf(name, "domain %d lock %s", data[j].idx, data[j].name);
-            break;
-        default:
-            sprintf(name, "unknown type(%d) %d lock %s", data[j].type,
-                    data[j].idx, data[j].name);
-            break;
-        }
+        if ( data[j].idx == LOCKPROF_IDX_NONE )
+            sprintf(name, "%s %s", data[j].type, data[j].name);
+        else
+            sprintf(name, "%s %d %s", data[j].type, data[j].idx, data[j].name);
         l = (double)(data[j].lock_time) / 1E+09;
         b = (double)(data[j].block_time) / 1E+09;
         sl += l;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 744b572195..1051bf38a3 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -290,7 +290,7 @@ static void _domain_destroy(struct domain *d)
 
     xsm_free_security_domain(d);
 
-    lock_profile_deregister_struct(LOCKPROF_TYPE_PERDOM, d);
+    lock_profile_deregister_struct(d, "Domain");
 
     free_domain_struct(d);
 }
@@ -369,7 +369,7 @@ struct domain *domain_create(domid_t domid,
         d->max_vcpus = config->max_vcpus;
     }
 
-    lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
+    lock_profile_register_struct(d, domid, "Domain");
 
     if ( (err = xsm_alloc_security_domain(d)) != 0 )
         goto fail;
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 5730076581..2582b4e340 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -330,42 +330,43 @@ void _spin_unlock_recursive(spinlock_t *lock)
 #ifdef CONFIG_LOCK_PROFILE
 
 struct lock_profile_anc {
+    struct lock_profile_anc   *next;     /* next type */
     struct lock_profile_qhead *head_q;   /* first head of this type */
-    char                      *name;     /* descriptive string for print */
+    const char                *name;     /* descriptive string for print */
 };
 
 typedef void lock_profile_subfunc(
-    struct lock_profile *, int32_t, int32_t, void *);
+    struct lock_profile *, const char *, int32_t, void *);
 
 extern struct lock_profile *__lock_profile_start;
 extern struct lock_profile *__lock_profile_end;
 
 static s_time_t lock_profile_start;
-static struct lock_profile_anc lock_profile_ancs[LOCKPROF_TYPE_N];
+static struct lock_profile_anc *lock_profile_ancs;
 static struct lock_profile_qhead lock_profile_glb_q;
 static spinlock_t lock_profile_lock = SPIN_LOCK_UNLOCKED;
 
 static void spinlock_profile_iterate(lock_profile_subfunc *sub, void *par)
 {
-    int i;
+    struct lock_profile_anc *anc;
     struct lock_profile_qhead *hq;
     struct lock_profile *eq;
 
     spin_lock(&lock_profile_lock);
-    for ( i = 0; i < LOCKPROF_TYPE_N; i++ )
-        for ( hq = lock_profile_ancs[i].head_q; hq; hq = hq->head_q )
+    for ( anc = lock_profile_ancs; anc; anc = anc->next )
+        for ( hq = anc->head_q; hq; hq = hq->head_q )
             for ( eq = hq->elem_q; eq; eq = eq->next )
-                sub(eq, i, hq->idx, par);
+                sub(eq, anc->name, hq->idx, par);
     spin_unlock(&lock_profile_lock);
 }
 
 static void spinlock_profile_print_elem(struct lock_profile *data,
-    int32_t type, int32_t idx, void *par)
+    const char *type, int32_t idx, void *par)
 {
     struct spinlock *lock = data->lock;
 
-    printk("%s ", lock_profile_ancs[type].name);
-    if ( type != LOCKPROF_TYPE_GLOBAL )
+    printk("%s ", type);
+    if ( idx != LOCKPROF_IDX_NONE )
         printk("%d ", idx);
     printk("%s: addr=%p, lockval=%04x, cpu=%d\n", data->name, lock,
            lock->tickets.head_tail, lock->debug.cpu);
@@ -387,7 +388,7 @@ void spinlock_profile_printall(unsigned char key)
 }
 
 static void spinlock_profile_reset_elem(struct lock_profile *data,
-    int32_t type, int32_t idx, void *par)
+    const char *type, int32_t idx, void *par)
 {
     data->lock_cnt = 0;
     data->block_cnt = 0;
@@ -411,7 +412,7 @@ typedef struct {
 } spinlock_profile_ucopy_t;
 
 static void spinlock_profile_ucopy_elem(struct lock_profile *data,
-    int32_t type, int32_t idx, void *par)
+    const char *type, int32_t idx, void *par)
 {
     spinlock_profile_ucopy_t *p = par;
     struct xen_sysctl_lockprof_data elem;
@@ -422,7 +423,7 @@ static void spinlock_profile_ucopy_elem(struct lock_profile *data,
     if ( p->pc->nr_elem < p->pc->max_elem )
     {
         safe_strcpy(elem.name, data->name);
-        elem.type = type;
+        safe_strcpy(elem.type, type);
         elem.idx = idx;
         elem.lock_cnt = data->lock_cnt;
         elem.block_cnt = data->block_cnt;
@@ -463,31 +464,67 @@ int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
     return rc;
 }
 
-void _lock_profile_register_struct(
-    int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
+static struct lock_profile_anc *find_prof_anc(const char *name)
 {
-    qhead->idx = idx;
+    struct lock_profile_anc *anc;
+
+    for ( anc = lock_profile_ancs; anc; anc = anc->next )
+        if ( !strcmp(anc->name, name) )
+            return anc;
+    return NULL;
+}
+
+void _lock_profile_register_struct(struct lock_profile_qhead *qhead,
+                                   int32_t idx, const char *name)
+{
+    struct lock_profile_anc *anc;
+
     spin_lock(&lock_profile_lock);
-    qhead->head_q = lock_profile_ancs[type].head_q;
-    lock_profile_ancs[type].head_q = qhead;
-    lock_profile_ancs[type].name = name;
+
+    anc = find_prof_anc(name);
+    if ( !anc )
+    {
+        anc = xzalloc(struct lock_profile_anc);
+        anc->name = name;
+        anc->next = lock_profile_ancs;
+        lock_profile_ancs = anc;
+    }
+
+    qhead->idx = idx;
+    qhead->head_q = anc->head_q;
+    anc->head_q = qhead;
+
     spin_unlock(&lock_profile_lock);
 }
 
-void _lock_profile_deregister_struct(
-    int32_t type, struct lock_profile_qhead *qhead)
+void _lock_profile_deregister_struct(struct lock_profile_qhead *qhead,
+                                     const char *name)
 {
+    struct lock_profile_anc *anc;
     struct lock_profile_qhead **q;
+    struct lock_profile *elem;
 
     spin_lock(&lock_profile_lock);
-    for ( q = &lock_profile_ancs[type].head_q; *q; q = &(*q)->head_q )
+
+    anc = find_prof_anc(name);
+    if ( anc )
     {
-        if ( *q == qhead )
+        for ( q = &anc->head_q; *q; q = &(*q)->head_q )
         {
-            *q = qhead->head_q;
-            break;
+            if ( *q == qhead )
+            {
+                *q = qhead->head_q;
+                while ( qhead->elem_q )
+                {
+                    elem = qhead->elem_q;
+                    qhead->elem_q = elem->next;
+                    xfree(elem);
+                }
+                break;
+            }
         }
     }
+
     spin_unlock(&lock_profile_lock);
 }
 
@@ -503,9 +540,8 @@ static int __init lock_prof_init(void)
         (*q)->lock->profile = *q;
     }
 
-    _lock_profile_register_struct(
-        LOCKPROF_TYPE_GLOBAL, &lock_profile_glb_q,
-        0, "Global lock");
+    _lock_profile_register_struct(&lock_profile_glb_q, LOCKPROF_IDX_NONE,
+                                  "Global");
 
     return 0;
 }
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 91c48dcae0..fafce64f02 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -35,7 +35,7 @@
 #include "domctl.h"
 #include "physdev.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000012
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013
 
 /*
  * Read console content from Xen buffer ring.
@@ -433,14 +433,11 @@ struct xen_sysctl_page_offline_op {
 /* Sub-operations: */
 #define XEN_SYSCTL_LOCKPROF_reset 1   /* Reset all profile data to zero. */
 #define XEN_SYSCTL_LOCKPROF_query 2   /* Get lock profile information. */
-/* Record-type: */
-#define LOCKPROF_TYPE_GLOBAL      0   /* global lock, idx meaningless */
-#define LOCKPROF_TYPE_PERDOM      1   /* per-domain lock, idx is domid */
-#define LOCKPROF_TYPE_N           2   /* number of types */
 struct xen_sysctl_lockprof_data {
-    char     name[40];     /* lock name (may include up to 2 %d specifiers) */
-    int32_t  type;         /* LOCKPROF_TYPE_??? */
+    char     name[40];     /* lock name */
+    char     type[20];     /* e.g. "domain" */
     int32_t  idx;          /* index (e.g. domain id) */
+#define LOCKPROF_IDX_NONE  -1
     uint64_aligned_t lock_cnt;     /* # of locking succeeded */
     uint64_aligned_t block_cnt;    /* # of wait for lock */
     uint64_aligned_t lock_time;    /* nsecs lock held */
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 26891445fe..fe560f5f5c 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -51,17 +51,16 @@ union lock_debug { };
 
     - each structure has to be added to profiling with
 
-      lock_profile_register_struct(type, ptr, idx, print);
+      lock_profile_register_struct(ptr, idx, print);
 
       with:
-        type:  something like LOCKPROF_TYPE_PERDOM
         ptr:   pointer to the structure
         idx:   index of that structure, e.g. domid
         print: descriptive string like "domain"
 
     - removing of a structure is done via
 
-      lock_profile_deregister_struct(type, ptr);
+      lock_profile_deregister_struct(ptr, print);
 */
 
 struct spinlock;
@@ -107,14 +106,14 @@ struct lock_profile_qhead {
         (s)->profile_head.elem_q = prof;                                      \
     } while(0)
 
-void _lock_profile_register_struct(
-    int32_t, struct lock_profile_qhead *, int32_t, char *);
-void _lock_profile_deregister_struct(int32_t, struct lock_profile_qhead *);
+void _lock_profile_register_struct(struct lock_profile_qhead *, int32_t,
+                                   const char *);
+void _lock_profile_deregister_struct(struct lock_profile_qhead *, const char *);
 
-#define lock_profile_register_struct(type, ptr, idx, print)                   \
-    _lock_profile_register_struct(type, &((ptr)->profile_head), idx, print)
-#define lock_profile_deregister_struct(type, ptr)                             \
-    _lock_profile_deregister_struct(type, &((ptr)->profile_head))
+#define lock_profile_register_struct(ptr, idx, print)                         \
+    _lock_profile_register_struct(&((ptr)->profile_head), idx, print)
+#define lock_profile_deregister_struct(ptr, print)                            \
+    _lock_profile_deregister_struct(&((ptr)->profile_head), print)
 
 extern int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc);
 extern void spinlock_profile_printall(unsigned char key);
@@ -128,8 +127,8 @@ struct lock_profile_qhead { };
 #define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
 
 #define spin_lock_init_prof(s, l) spin_lock_init(&((s)->l))
-#define lock_profile_register_struct(type, ptr, idx, print)
-#define lock_profile_deregister_struct(type, ptr)
+#define lock_profile_register_struct(ptr, idx, print)
+#define lock_profile_deregister_struct(ptr, print)
 #define spinlock_profile_printall(key)
 
 #endif
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
@ 2019-08-07 17:05   ` Andrew Cooper
  2019-08-08  4:34     ` Juergen Gross
  2019-08-08  6:58   ` Jan Beulich
  1 sibling, 1 reply; 36+ messages in thread
From: Andrew Cooper @ 2019-08-07 17:05 UTC (permalink / raw)
  To: Juergen Gross, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall,
	Jan Beulich


On 07/08/2019 15:31, Juergen Gross wrote:
> Add the cpu currently holding the lock to struct lock_debug. This makes
> analysis of locking errors easier and it can be tested whether the
> correct cpu is releasing a lock again.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Looking at the structure:

xen.git/xen$ pahole xen-syms -E -C spinlock
struct spinlock {
	/* typedef spinlock_tickets_t */ union {
		/* typedef u32 */ unsigned int       head_tail;                          /*           4 */
		struct {
			/* typedef u16 */ short unsigned int head;                       /*     0     2 */
			/* typedef u16 */ short unsigned int tail;                       /*     2     2 */
		};                                                                       /*           4 */
	} tickets; /*     0     4 */
	/* typedef u16 */ short unsigned int         recurse_cpu:12;                     /*     4: 4  2 */
	/* typedef u16 */ short unsigned int         recurse_cnt:4;                      /*     4: 0  2 */
	union lock_debug {
		short unsigned int val;                                                  /*           2 */
		struct {
			short unsigned int unused:1;                                     /*     6:15  2 */
			short unsigned int irq_safe:1;                                   /*     6:14  2 */
			short unsigned int pad:2;                                        /*     6:12  2 */
			short unsigned int cpu:12;                                       /*     6: 0  2 */
		};                                                                       /*           2 */
	} debug; /*     6     2 */

	/* size: 8, cachelines: 1, members: 4 */
	/* last cacheline: 8 bytes */
};


we now get two 12-bit CPU fields trying to fit into 4 bytes.  Is it
possible to reuse the recurse_cpu field for debugging as well?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-07 14:31 ` [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option Juergen Gross
@ 2019-08-07 17:17   ` Andrew Cooper
  2019-08-08  4:35     ` Juergen Gross
  2019-08-08  6:34   ` Jan Beulich
  1 sibling, 1 reply; 36+ messages in thread
From: Andrew Cooper @ 2019-08-07 17:17 UTC (permalink / raw)
  To: Juergen Gross, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall,
	Jan Beulich

On 07/08/2019 15:31, Juergen Gross wrote:
> Instead of enabling spinlock debugging for debug builds only add a
> dedicated Kconfig option for that purpose which defaults to DEBUG.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
>  xen/Kconfig.debug          | 7 +++++++
>  xen/common/spinlock.c      | 4 ++--
>  xen/include/xen/spinlock.h | 2 +-
>  3 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
> index e10e314e25..5db3e7ec25 100644
> --- a/xen/Kconfig.debug
> +++ b/xen/Kconfig.debug
> @@ -44,6 +44,13 @@ config COVERAGE
>  
>  	  If unsure, say N here.
>  
> +config SPINLOCK_DEBUG
> +	bool "Spinlock debugging"
> +	default DEBUG
> +	---help---
> +	  Enable debugging features of spinlock handling.  Some additional
> +          checks will be performed when acquiring and releasing locks.

Missing tab on the final line.  Can be fixed on commit).

Otherwise, Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
                   ` (3 preceding siblings ...)
  2019-08-07 14:31 ` [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface Juergen Gross
@ 2019-08-07 18:11 ` Andrew Cooper
  2019-08-08  4:50   ` Juergen Gross
  4 siblings, 1 reply; 36+ messages in thread
From: Andrew Cooper @ 2019-08-07 18:11 UTC (permalink / raw)
  To: Juergen Gross, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall,
	Jan Beulich

On 07/08/2019 15:31, Juergen Gross wrote:
> While hunting a locking problem in my core scheduling series I have
> added some debugging aids to spinlock handling making it easier to
> find the root cause for the problem.
> 
> Making use of the already lock profiling and enhancing it a little
> bit produces some really valuable diagnostic data e.g. when a NMI
> watchdog is triggering a crash.

So I'm not sure where best to report this in the series, so 0/$N will
have to do.  Here is a sample trace from panic():

  (XEN) Xen lock profile info SHOW  (now = 49695791886 total = 49695791886)
  (XEN) Global grant_lock: addr=ffff82d080818640, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global balloon_lock: addr=ffff82d080818630, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global osvw_lock: addr=ffff82d080818610, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global mtrr_mutex: addr=ffff82d0808185f0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global set_atomicity_lock: addr=ffff82d0808185e0, lockval=80008, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global cmci_discover_lock: addr=ffff82d0808185c0, lockval=80008, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global mce_logout_lock: addr=ffff82d0808185a0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global processing_lock: addr=ffff82d080818580, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global vpmu_lock: addr=ffff82d080818560, lockval=40004, cpu=4095
  (XEN)   lock:           4(00000000:0000007D), block:            0(00000000:00000000)
  (XEN) Global pm_lock: addr=ffff82d080818230, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global rtc_lock: addr=ffff82d080818210, lockval=310031, cpu=4095
  (XEN)   lock:          48(00000000:0001892E), block:            0(00000000:00000000)
  (XEN) Global pit_lock: addr=ffff82d080818200, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global platform_timer_lock: addr=ffff82d0808181f0, lockval=6c006c, cpu=4095
  (XEN)   lock:          92(00000000:00012CAB), block:            0(00000000:00000000)
  (XEN) Global sync_lock: addr=ffff82d0808181d0, lockval=c188c188, cpu=4095
  (XEN)   lock:      115080(00000000:0074F130), block:            115079(00000000:0871000E)
  (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
  (XEN)   lock:           1(00000000:01322165), block:            0(00000000:00000000)
  (XEN) Global flush_lock: addr=ffff82d0808181c0, lockval=91139111, cpu=3
  (XEN)   lock:      168195(00000000:159C6F6E), block:            1695(00000000:006B31FD)
  (XEN) Global xenpf_lock: addr=ffff82d0808181b0, lockval=2f002f, cpu=4095
  (XEN)   lock:          47(00000000:0001BC02), block:            0(00000000:00000000)
  (XEN) Global pci_config_lock: addr=ffff82d0808181a0, lockval=374d374d, cpu=4095
  (XEN)   lock:        5196(00000000:004545AA), block:            0(00000000:00000000)
  (XEN) Global lapic_nmi_owner_lock: addr=ffff82d080817d70, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global last_lock: addr=ffff82d080817d40, lockval=40034003, cpu=4095
  (XEN)   lock:       16387(00000000:00047132), block:            0(00000000:00000000)
  (XEN) Global map_pgdir_lock: addr=ffff82d080817d50, lockval=2c002c, cpu=4095
  (XEN)   lock:          38(00000000:00000ACF), block:            0(00000000:00000000)
  (XEN) Global microcode_mutex: addr=ffff82d080817d20, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global microcode_update_lock: addr=ffff82d080817d10, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global microcode_update_lock: addr=ffff82d080817d00, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global vector_lock: addr=ffff82d080817cf0, lockval=440044, cpu=4095
  (XEN)   lock:          27(00000000:00003B20), block:            0(00000000:00000000)
  (XEN) Global irq_ratelimit_lock: addr=ffff82d080817ce0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global msix_fixmap_lock: addr=ffff82d080817cb0, lockval=20002, cpu=4095
  (XEN)   lock:           2(00000000:000000CF), block:            0(00000000:00000000)
  (XEN) Global ioapic_lock: addr=ffff82d080817c90, lockval=660066, cpu=4095
  (XEN)   lock:          20(00000000:00026E6C), block:            0(00000000:00000000)
  (XEN) Global i8259A_lock: addr=ffff82d080817c80, lockval=2d002d, cpu=4095
  (XEN)   lock:           6(00000000:00001BA0), block:            0(00000000:00000000)
  (XEN) Global apei_iomaps_lock: addr=ffff82d080817c50, lockval=110011, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global erst_lock: addr=ffff82d080817c40, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global shared_intremap_lock: addr=ffff82d080817bc0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global iommu_pt_cleanup_lock: addr=ffff82d080817ba0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global rx_lock: addr=ffff82d080817b70, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global tx_lock: addr=ffff82d080817b60, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global console_lock: addr=ffff82d080817830, lockval=770077, cpu=4095
  (XEN)   lock:        3447(00000000:07595894), block:            0(00000000:00000000)
  (XEN) Global ratelimit_lock: addr=ffff82d080817820, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global pool_list_lock: addr=ffff82d0808177e0, lockval=10001, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global xenoprof_lock: addr=ffff82d0808177d0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global pmu_owner_lock: addr=ffff82d0808177c0, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global vm_lock: addr=ffff82d0808177b0, lockval=220022, cpu=4095
  (XEN)   lock:          17(00000000:00001ED1), block:            0(00000000:00000000)
  (XEN) Global virtual_region_lock: addr=ffff82d0808177a0, lockval=10001, cpu=4095
  (XEN)   lock:           1(00000000:00000054), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global wc_lock: addr=ffff82d080817770, lockval=c000c, cpu=4095
  (XEN)   lock:          11(00000000:00000995), block:            0(00000000:00000000)
  (XEN) Global tasklet_lock: addr=ffff82d080817760, lockval=3f403f4, cpu=4095
  (XEN)   lock:         426(00000000:00007AFF), block:            0(00000000:00000000)
  (XEN) Global sysctl_lock: addr=ffff82d080817750, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global symbols_mutex: addr=ffff82d080817740, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global stopmachine_lock: addr=ffff82d080817730, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global call_lock: addr=ffff82d080817710, lockval=4d004d, cpu=4095
  (XEN)   lock:          67(00000000:00176310), block:            0(00000000:00000000)
  (XEN) Global heap_lock: addr=ffff82d080817570, lockval=6b286b28, cpu=4095
  (XEN)   lock:      753071(00000000:018E9B8B), block:            33(00000000:0000916A)
  (XEN) Global payload_lock: addr=ffff82d080817550, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global crash_notes_lock: addr=ffff82d080817530, lockval=80008, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global global_virq_handlers_lock: addr=ffff82d080817500, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global domlist_update_lock: addr=ffff82d0808174e0, lockval=10001, cpu=4095
  (XEN)   lock:           1(00000000:0000005B), block:            0(00000000:00000000)
  (XEN) Global domctl_lock: addr=ffff82d0808174d0, lockval=50005, cpu=4095
  (XEN)   lock:           5(00000000:000008DC), block:            0(00000000:00000000)
  (XEN) Global cpupool_lock: addr=ffff82d0808174b0, lockval=a000a, cpu=4095
  (XEN)   lock:           1(00000000:00000176), block:            0(00000000:00000000)
  (XEN) Global cpu_add_remove_lock: addr=ffff82d0808174a0, lockval=2d002d, cpu=4095
  (XEN)   lock:          24(00000000:0000175F), block:            0(00000000:00000000)
  (XEN) Global efi_rs_lock: addr=ffff82d080817030, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Domain 0 event_lock: addr=ffff8308366c50e8, lockval=1d451d45, cpu=4095
  (XEN)   lock:        7493(00000000:00866052), block:            0(00000000:00000000)
  (XEN) Domain 0 page_alloc_lock: addr=ffff8308366c5028, lockval=9aa59aa5, cpu=4095
  (XEN)   lock:       39589(00000000:0055339E), block:            0(00000000:00000000)
  (XEN) Domain 0 domain_lock: addr=ffff8308366c5018, lockval=150015, cpu=4095
  (XEN)   lock:          21(00000000:00009D70), block:            0(00000000:00000000)
  (XEN) Domain 32767 page_alloc_lock: addr=ffff83084ccc5028, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Domain 32767 domain_lock: addr=ffff83084ccc5018, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Domain 32753 page_alloc_lock: addr=ffff83084cccf028, lockval=3ee43ee4, cpu=4095
  (XEN)   lock:       16100(00000000:0005173F), block:            0(00000000:00000000)
  (XEN) Domain 32753 domain_lock: addr=ffff83084cccf018, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Domain 32754 page_alloc_lock: addr=ffff83084ccd0028, lockval=64006400, cpu=4095
  (XEN)   lock:       25600(00000000:000814D9), block:            0(00000000:00000000)
  (XEN) Domain 32754 domain_lock: addr=ffff83084ccd0018, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)

Its not exactly the easiest to dump to follow.

First of all - I don't see why the hold/block time are printed like that.  It
might be a holdover from the 32bit build, pre PRId64 support.  They should
probably use PRI_stime anyway.

The domid rendering is unfortunate.  Ideally we'd use %pd but that would
involve rearranging the logic to get a struct domain* in hand.  Seeing as
you're the last person to modify this code, how hard would that be to do?

If/when the per-domain locks are rendered more clearly, the "Global " prefix
can be dropped.

An interesting quirk of ticketlocks is that lockval= is redundant with
lock_cnt.  Could we perhaps fold the two fields?

Distinguishing cpu=4095 as "not locked" would be useful.

We have several global locks called lock:

  (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
  (XEN)   lock:           1(00000000:01322165), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
  (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
  (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)

The second one in particular has corrupt data, seeing has it has been taken
and released several times without lock_cnt increasing.

For sanity sake, we should enforce unique naming of any lock registered for
profiling.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-07 17:05   ` Andrew Cooper
@ 2019-08-08  4:34     ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  4:34 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 07.08.19 19:05, Andrew Cooper wrote:
> 
> On 07/08/2019 15:31, Juergen Gross wrote:
>> Add the cpu currently holding the lock to struct lock_debug. This makes
>> analysis of locking errors easier and it can be tested whether the
>> correct cpu is releasing a lock again.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Looking at the structure:
> 
> xen.git/xen$ pahole xen-syms -E -C spinlock
> struct spinlock {
> 	/* typedef spinlock_tickets_t */ union {
> 		/* typedef u32 */ unsigned int       head_tail;                          /*           4 */
> 		struct {
> 			/* typedef u16 */ short unsigned int head;                       /*     0     2 */
> 			/* typedef u16 */ short unsigned int tail;                       /*     2     2 */
> 		};                                                                       /*           4 */
> 	} tickets; /*     0     4 */
> 	/* typedef u16 */ short unsigned int         recurse_cpu:12;                     /*     4: 4  2 */
> 	/* typedef u16 */ short unsigned int         recurse_cnt:4;                      /*     4: 0  2 */
> 	union lock_debug {
> 		short unsigned int val;                                                  /*           2 */
> 		struct {
> 			short unsigned int unused:1;                                     /*     6:15  2 */
> 			short unsigned int irq_safe:1;                                   /*     6:14  2 */
> 			short unsigned int pad:2;                                        /*     6:12  2 */
> 			short unsigned int cpu:12;                                       /*     6: 0  2 */
> 		};                                                                       /*           2 */
> 	} debug; /*     6     2 */
> 
> 	/* size: 8, cachelines: 1, members: 4 */
> 	/* last cacheline: 8 bytes */
> };
> 
> 
> we now get two 12-bit CPU fields trying to fit into 4 bytes.  Is it
> possible to reuse the recurse_cpu field for debugging as well?

I don't think this would be a good idea for two reasons:

- Changing the way recurse_cpu is being used in debug builds might
   result in weird behavior in corer cases. That's not what debug
   additions are meant for.

- We might be able to save 2 bytes, which will then just be unused.
   So no memory saved, but complexity gained.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-07 17:17   ` Andrew Cooper
@ 2019-08-08  4:35     ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  4:35 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 07.08.19 19:17, Andrew Cooper wrote:
> On 07/08/2019 15:31, Juergen Gross wrote:
>> Instead of enabling spinlock debugging for debug builds only add a
>> dedicated Kconfig option for that purpose which defaults to DEBUG.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>>   xen/Kconfig.debug          | 7 +++++++
>>   xen/common/spinlock.c      | 4 ++--
>>   xen/include/xen/spinlock.h | 2 +-
>>   3 files changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
>> index e10e314e25..5db3e7ec25 100644
>> --- a/xen/Kconfig.debug
>> +++ b/xen/Kconfig.debug
>> @@ -44,6 +44,13 @@ config COVERAGE
>>   
>>   	  If unsure, say N here.
>>   
>> +config SPINLOCK_DEBUG
>> +	bool "Spinlock debugging"
>> +	default DEBUG
>> +	---help---
>> +	  Enable debugging features of spinlock handling.  Some additional
>> +          checks will be performed when acquiring and releasing locks.
> 
> Missing tab on the final line.  Can be fixed on commit).

Oh, sorry. Will fix in V2.

> 
> Otherwise, Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
> 

Thanks,


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-07 18:11 ` [Xen-devel] [PATCH 0/4] enhance lock debugging Andrew Cooper
@ 2019-08-08  4:50   ` Juergen Gross
  2019-08-08  8:33     ` Andrew Cooper
  0 siblings, 1 reply; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  4:50 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 07.08.19 20:11, Andrew Cooper wrote:
> On 07/08/2019 15:31, Juergen Gross wrote:
>> While hunting a locking problem in my core scheduling series I have
>> added some debugging aids to spinlock handling making it easier to
>> find the root cause for the problem.
>>
>> Making use of the already lock profiling and enhancing it a little
>> bit produces some really valuable diagnostic data e.g. when a NMI
>> watchdog is triggering a crash.
> 
> So I'm not sure where best to report this in the series, so 0/$N will
> have to do.  Here is a sample trace from panic():
> 
>    (XEN) Xen lock profile info SHOW  (now = 49695791886 total = 49695791886)
>    (XEN) Global grant_lock: addr=ffff82d080818640, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global balloon_lock: addr=ffff82d080818630, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global osvw_lock: addr=ffff82d080818610, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global mtrr_mutex: addr=ffff82d0808185f0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global set_atomicity_lock: addr=ffff82d0808185e0, lockval=80008, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global cmci_discover_lock: addr=ffff82d0808185c0, lockval=80008, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global mce_logout_lock: addr=ffff82d0808185a0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global processing_lock: addr=ffff82d080818580, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global vpmu_lock: addr=ffff82d080818560, lockval=40004, cpu=4095
>    (XEN)   lock:           4(00000000:0000007D), block:            0(00000000:00000000)
>    (XEN) Global pm_lock: addr=ffff82d080818230, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global rtc_lock: addr=ffff82d080818210, lockval=310031, cpu=4095
>    (XEN)   lock:          48(00000000:0001892E), block:            0(00000000:00000000)
>    (XEN) Global pit_lock: addr=ffff82d080818200, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global platform_timer_lock: addr=ffff82d0808181f0, lockval=6c006c, cpu=4095
>    (XEN)   lock:          92(00000000:00012CAB), block:            0(00000000:00000000)
>    (XEN) Global sync_lock: addr=ffff82d0808181d0, lockval=c188c188, cpu=4095
>    (XEN)   lock:      115080(00000000:0074F130), block:            115079(00000000:0871000E)
>    (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
>    (XEN)   lock:           1(00000000:01322165), block:            0(00000000:00000000)
>    (XEN) Global flush_lock: addr=ffff82d0808181c0, lockval=91139111, cpu=3
>    (XEN)   lock:      168195(00000000:159C6F6E), block:            1695(00000000:006B31FD)
>    (XEN) Global xenpf_lock: addr=ffff82d0808181b0, lockval=2f002f, cpu=4095
>    (XEN)   lock:          47(00000000:0001BC02), block:            0(00000000:00000000)
>    (XEN) Global pci_config_lock: addr=ffff82d0808181a0, lockval=374d374d, cpu=4095
>    (XEN)   lock:        5196(00000000:004545AA), block:            0(00000000:00000000)
>    (XEN) Global lapic_nmi_owner_lock: addr=ffff82d080817d70, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global last_lock: addr=ffff82d080817d40, lockval=40034003, cpu=4095
>    (XEN)   lock:       16387(00000000:00047132), block:            0(00000000:00000000)
>    (XEN) Global map_pgdir_lock: addr=ffff82d080817d50, lockval=2c002c, cpu=4095
>    (XEN)   lock:          38(00000000:00000ACF), block:            0(00000000:00000000)
>    (XEN) Global microcode_mutex: addr=ffff82d080817d20, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global microcode_update_lock: addr=ffff82d080817d10, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global microcode_update_lock: addr=ffff82d080817d00, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global vector_lock: addr=ffff82d080817cf0, lockval=440044, cpu=4095
>    (XEN)   lock:          27(00000000:00003B20), block:            0(00000000:00000000)
>    (XEN) Global irq_ratelimit_lock: addr=ffff82d080817ce0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global msix_fixmap_lock: addr=ffff82d080817cb0, lockval=20002, cpu=4095
>    (XEN)   lock:           2(00000000:000000CF), block:            0(00000000:00000000)
>    (XEN) Global ioapic_lock: addr=ffff82d080817c90, lockval=660066, cpu=4095
>    (XEN)   lock:          20(00000000:00026E6C), block:            0(00000000:00000000)
>    (XEN) Global i8259A_lock: addr=ffff82d080817c80, lockval=2d002d, cpu=4095
>    (XEN)   lock:           6(00000000:00001BA0), block:            0(00000000:00000000)
>    (XEN) Global apei_iomaps_lock: addr=ffff82d080817c50, lockval=110011, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global erst_lock: addr=ffff82d080817c40, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global shared_intremap_lock: addr=ffff82d080817bc0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global iommu_pt_cleanup_lock: addr=ffff82d080817ba0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global rx_lock: addr=ffff82d080817b70, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global tx_lock: addr=ffff82d080817b60, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global console_lock: addr=ffff82d080817830, lockval=770077, cpu=4095
>    (XEN)   lock:        3447(00000000:07595894), block:            0(00000000:00000000)
>    (XEN) Global ratelimit_lock: addr=ffff82d080817820, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global pool_list_lock: addr=ffff82d0808177e0, lockval=10001, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global xenoprof_lock: addr=ffff82d0808177d0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global pmu_owner_lock: addr=ffff82d0808177c0, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global vm_lock: addr=ffff82d0808177b0, lockval=220022, cpu=4095
>    (XEN)   lock:          17(00000000:00001ED1), block:            0(00000000:00000000)
>    (XEN) Global virtual_region_lock: addr=ffff82d0808177a0, lockval=10001, cpu=4095
>    (XEN)   lock:           1(00000000:00000054), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global wc_lock: addr=ffff82d080817770, lockval=c000c, cpu=4095
>    (XEN)   lock:          11(00000000:00000995), block:            0(00000000:00000000)
>    (XEN) Global tasklet_lock: addr=ffff82d080817760, lockval=3f403f4, cpu=4095
>    (XEN)   lock:         426(00000000:00007AFF), block:            0(00000000:00000000)
>    (XEN) Global sysctl_lock: addr=ffff82d080817750, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global symbols_mutex: addr=ffff82d080817740, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global stopmachine_lock: addr=ffff82d080817730, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global call_lock: addr=ffff82d080817710, lockval=4d004d, cpu=4095
>    (XEN)   lock:          67(00000000:00176310), block:            0(00000000:00000000)
>    (XEN) Global heap_lock: addr=ffff82d080817570, lockval=6b286b28, cpu=4095
>    (XEN)   lock:      753071(00000000:018E9B8B), block:            33(00000000:0000916A)
>    (XEN) Global payload_lock: addr=ffff82d080817550, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global crash_notes_lock: addr=ffff82d080817530, lockval=80008, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global global_virq_handlers_lock: addr=ffff82d080817500, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global domlist_update_lock: addr=ffff82d0808174e0, lockval=10001, cpu=4095
>    (XEN)   lock:           1(00000000:0000005B), block:            0(00000000:00000000)
>    (XEN) Global domctl_lock: addr=ffff82d0808174d0, lockval=50005, cpu=4095
>    (XEN)   lock:           5(00000000:000008DC), block:            0(00000000:00000000)
>    (XEN) Global cpupool_lock: addr=ffff82d0808174b0, lockval=a000a, cpu=4095
>    (XEN)   lock:           1(00000000:00000176), block:            0(00000000:00000000)
>    (XEN) Global cpu_add_remove_lock: addr=ffff82d0808174a0, lockval=2d002d, cpu=4095
>    (XEN)   lock:          24(00000000:0000175F), block:            0(00000000:00000000)
>    (XEN) Global efi_rs_lock: addr=ffff82d080817030, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Domain 0 event_lock: addr=ffff8308366c50e8, lockval=1d451d45, cpu=4095
>    (XEN)   lock:        7493(00000000:00866052), block:            0(00000000:00000000)
>    (XEN) Domain 0 page_alloc_lock: addr=ffff8308366c5028, lockval=9aa59aa5, cpu=4095
>    (XEN)   lock:       39589(00000000:0055339E), block:            0(00000000:00000000)
>    (XEN) Domain 0 domain_lock: addr=ffff8308366c5018, lockval=150015, cpu=4095
>    (XEN)   lock:          21(00000000:00009D70), block:            0(00000000:00000000)
>    (XEN) Domain 32767 page_alloc_lock: addr=ffff83084ccc5028, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Domain 32767 domain_lock: addr=ffff83084ccc5018, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Domain 32753 page_alloc_lock: addr=ffff83084cccf028, lockval=3ee43ee4, cpu=4095
>    (XEN)   lock:       16100(00000000:0005173F), block:            0(00000000:00000000)
>    (XEN) Domain 32753 domain_lock: addr=ffff83084cccf018, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Domain 32754 page_alloc_lock: addr=ffff83084ccd0028, lockval=64006400, cpu=4095
>    (XEN)   lock:       25600(00000000:000814D9), block:            0(00000000:00000000)
>    (XEN) Domain 32754 domain_lock: addr=ffff83084ccd0018, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
> 
> Its not exactly the easiest to dump to follow.
> 
> First of all - I don't see why the hold/block time are printed like that.  It
> might be a holdover from the 32bit build, pre PRId64 support.  They should
> probably use PRI_stime anyway.

Fine with me.

> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
> involve rearranging the logic to get a struct domain* in hand.  Seeing as
> you're the last person to modify this code, how hard would that be to do?

It would completely break the struct type agnostic design.

I have a debug patch adding credit2 run-queue lock. It requires to just
add 5 lines of code (and this includes moving the spinlock_init() out of
an irq-off section). It will produce:

(XEN) credit2-runq 0 lock: addr=ffff830413351010, lockval=de00ddf, cpu=6
(XEN)   lock: 896287(00000000:00FAA6B2), block:  819(00000000:00009C80)

> If/when the per-domain locks are rendered more clearly, the "Global " prefix
> can be dropped.

I'm not sure we should do that.

> An interesting quirk of ticketlocks is that lockval= is redundant with
> lock_cnt.  Could we perhaps fold the two fields?

No. See my credit2 example. As soon as we get above 65535 lockings the
values are no longer redundant.

> Distinguishing cpu=4095 as "not locked" would be useful.

Yes, will change.

> We have several global locks called lock:
> 
>    (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
>    (XEN)   lock:           1(00000000:01322165), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
>    (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
>    (XEN)   lock:           0(00000000:00000000), block:            0(00000000:00000000)
> 
> The second one in particular has corrupt data, seeing has it has been taken
> and released several times without lock_cnt increasing.

The lock might have been taken/released before lock profiling has been
initialized.

> For sanity sake, we should enforce unique naming of any lock registered for
> profiling.

This would be every lock inited via DEFINE_SPINLOCK(). I can do a
followup patch for that purpose.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-07 14:31 ` [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option Juergen Gross
  2019-08-07 17:17   ` Andrew Cooper
@ 2019-08-08  6:34   ` Jan Beulich
  2019-08-08  7:23     ` Juergen Gross
  1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  6:34 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 07.08.2019 16:31, Juergen Gross wrote:
> --- a/xen/Kconfig.debug
> +++ b/xen/Kconfig.debug
> @@ -44,6 +44,13 @@ config COVERAGE
>   
>   	  If unsure, say N here.
>   
> +config SPINLOCK_DEBUG
> +	bool "Spinlock debugging"
> +	default DEBUG
> +	---help---
> +	  Enable debugging features of spinlock handling.  Some additional
> +          checks will be performed when acquiring and releasing locks.
> +
>   config LOCK_PROFILE

While the pre-existing LOCK_PROFILE suggests the opposite, I'd
still like to propose that we uniformly name all debugging
options CONFIG_DEBUG_* (rather than having the DEBUG at the
end). Thoughts?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
  2019-08-07 17:05   ` Andrew Cooper
@ 2019-08-08  6:58   ` Jan Beulich
  2019-08-08  7:51     ` Juergen Gross
  1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  6:58 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 07.08.2019 16:31, Juergen Gross wrote:
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -13,9 +13,9 @@
>   
>   static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
>   
> -static void check_lock(struct lock_debug *debug)
> +static void check_lock(union lock_debug *debug)
>   {
> -    int irq_safe = !local_irq_is_enabled();
> +    unsigned short irq_safe = !local_irq_is_enabled();

bool? Seeing your heavy use of "unsigned short" I realize that
my CodingStyle change committed yesterday still wasn't precise
enough.

> @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug)
>      */
>      if ( unlikely(debug->irq_safe != irq_safe) )
>      {
> -        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
> +        union lock_debug seen, new = { 0 };
>   
> -        if ( seen == !irq_safe )
> +        new.irq_safe = irq_safe;
> +        seen.val = cmpxchg(&debug->val, 0xffff, new.val);

This 0xffff should be connected (by way of at least a #define) to
the one used in _LOCK_DEBUG.

> +        if ( !seen.unused && seen.irq_safe == !irq_safe )

While "unused" makes sufficient sense here, ...

> --- a/xen/include/xen/spinlock.h
> +++ b/xen/include/xen/spinlock.h
> @@ -7,14 +7,20 @@
>   #include <xen/percpu.h>
>   
>   #ifndef NDEBUG
> -struct lock_debug {
> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
> +union lock_debug {
> +    unsigned short val;
> +    struct {
> +        unsigned short unused:1;

... it gives a rather misleading impression of this being an unused
field here. How about e.g. "unseen" instead?

> +        unsigned short irq_safe:1;
> +        unsigned short pad:2;
> +        unsigned short cpu:12;
> +    };
>   };

Do we have an implied assumption somewhere that unsigned short is
exactly 16 bits wide? I think "val" wants to be uint16_t here (as
you really mean "exactly 16 bits"), the two boolean fields want
to be bool, and the remaining two ones unsigned int.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic()
  2019-08-07 14:31 ` [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic() Juergen Gross
@ 2019-08-08  7:09   ` Jan Beulich
  2019-08-08  8:26     ` Juergen Gross
  0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  7:09 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 07.08.2019 16:31, Juergen Gross wrote:
> --- a/xen/Kconfig.debug
> +++ b/xen/Kconfig.debug
> @@ -53,6 +53,7 @@ config SPINLOCK_DEBUG
>   
>   config LOCK_PROFILE
>   	bool "Lock Profiling"
> +	select SPINLOCK_DEBUG
>   	---help---
>   	  Lock profiling allows you to see how often locks are taken and blocked.
>   	  You can use serial console to print (and reset) using 'l' and 'L'

In which case, for sensible user experience, the selected
option should come after this one. The way it is now afaict
there'll be a prompt for SPINLOCK_DEBUG, the user may say
"no", just to find that because of saying "yes" here it'll
be turned on anyway. Whereas with switched ordering there'll
be no prompt for the debug option at all (again afaict) if
the profiling option was set to "yes".

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-08  6:34   ` Jan Beulich
@ 2019-08-08  7:23     ` Juergen Gross
  2019-08-08  7:35       ` Jan Beulich
  0 siblings, 1 reply; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  7:23 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 08:34, Jan Beulich wrote:
> On 07.08.2019 16:31, Juergen Gross wrote:
>> --- a/xen/Kconfig.debug
>> +++ b/xen/Kconfig.debug
>> @@ -44,6 +44,13 @@ config COVERAGE
>>         If unsure, say N here.
>> +config SPINLOCK_DEBUG
>> +    bool "Spinlock debugging"
>> +    default DEBUG
>> +    ---help---
>> +      Enable debugging features of spinlock handling.  Some additional
>> +          checks will be performed when acquiring and releasing locks.
>> +
>>   config LOCK_PROFILE
> 
> While the pre-existing LOCK_PROFILE suggests the opposite, I'd
> still like to propose that we uniformly name all debugging
> options CONFIG_DEBUG_* (rather than having the DEBUG at the
> end). Thoughts?

Fine with me. I can rename the LOCK_PROFILE option in patch 4 as I'm
touching it anyway.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface
  2019-08-07 14:31 ` [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface Juergen Gross
@ 2019-08-08  7:32   ` Jan Beulich
  2019-08-08  8:13     ` Juergen Gross
  0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  7:32 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 07.08.2019 16:31, Juergen Gross wrote:
> @@ -463,31 +464,67 @@ int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
>       return rc;
>   }
>   
> -void _lock_profile_register_struct(
> -    int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
> +static struct lock_profile_anc *find_prof_anc(const char *name)
>   {
> -    qhead->idx = idx;
> +    struct lock_profile_anc *anc;
> +
> +    for ( anc = lock_profile_ancs; anc; anc = anc->next )
> +        if ( !strcmp(anc->name, name) )
> +            return anc;
> +    return NULL;
> +}
> +
> +void _lock_profile_register_struct(struct lock_profile_qhead *qhead,
> +                                   int32_t idx, const char *name)
> +{
> +    struct lock_profile_anc *anc;
> +
>       spin_lock(&lock_profile_lock);
> -    qhead->head_q = lock_profile_ancs[type].head_q;
> -    lock_profile_ancs[type].head_q = qhead;
> -    lock_profile_ancs[type].name = name;
> +
> +    anc = find_prof_anc(name);
> +    if ( !anc )
> +    {
> +        anc = xzalloc(struct lock_profile_anc);
> +        anc->name = name;
> +        anc->next = lock_profile_ancs;
> +        lock_profile_ancs = anc;
> +    }

This is an imo fundamental weakness of the new model: You now
require a dynamic memory allocation (of which you don't even
check that it was successful). Right now the path above will
only be taken at boot time, but that's not stated anywhere as
a restriction (afaics), and hence doesn't need to remain this
way.

Furthermore "name" now serves two purposes when previously it
served just one. This is best noticeable with the function's
use in domain_create(): Previously that set up the class
"per-domain" with an initial instance "struct domain". This
did provide for someone later going and also registering
another per-domain structure (e.g. struct p2m_domain) as
another "per-domain" class instance. Then again maybe I'm not
correctly understanding the original intentions ...

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-08  7:23     ` Juergen Gross
@ 2019-08-08  7:35       ` Jan Beulich
  2019-08-08  8:25         ` Juergen Gross
  0 siblings, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  7:35 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.2019 09:23, Juergen Gross wrote:
> On 08.08.19 08:34, Jan Beulich wrote:
>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> --- a/xen/Kconfig.debug
>>> +++ b/xen/Kconfig.debug
>>> @@ -44,6 +44,13 @@ config COVERAGE
>>>         If unsure, say N here.
>>> +config SPINLOCK_DEBUG
>>> +    bool "Spinlock debugging"
>>> +    default DEBUG
>>> +    ---help---
>>> +      Enable debugging features of spinlock handling.  Some additional
>>> +          checks will be performed when acquiring and releasing locks.
>>> +
>>>   config LOCK_PROFILE
>>
>> While the pre-existing LOCK_PROFILE suggests the opposite, I'd
>> still like to propose that we uniformly name all debugging
>> options CONFIG_DEBUG_* (rather than having the DEBUG at the
>> end). Thoughts?
> 
> Fine with me. I can rename the LOCK_PROFILE option in patch 4 as I'm
> touching it anyway.

One more thing: Perhaps it would better be DEBUG_LOCK (i.e.
without "SPIN") or DEBUG_LOCKS, to also allow it to cover r/w
locks, should anyone want to instrument them as well.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08  6:58   ` Jan Beulich
@ 2019-08-08  7:51     ` Juergen Gross
  2019-08-08  7:59       ` Jan Beulich
  2019-08-08 10:28       ` Julien Grall
  0 siblings, 2 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  7:51 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 08:58, Jan Beulich wrote:
> On 07.08.2019 16:31, Juergen Gross wrote:
>> --- a/xen/common/spinlock.c
>> +++ b/xen/common/spinlock.c
>> @@ -13,9 +13,9 @@
>>    
>>    static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
>>    
>> -static void check_lock(struct lock_debug *debug)
>> +static void check_lock(union lock_debug *debug)
>>    {
>> -    int irq_safe = !local_irq_is_enabled();
>> +    unsigned short irq_safe = !local_irq_is_enabled();
> 
> bool? Seeing your heavy use of "unsigned short" I realize that
> my CodingStyle change committed yesterday still wasn't precise
> enough.

I wanted to be consistent with the type used in the structure.
I can switch to bool if you like that better.

> 
>> @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug)
>>       */
>>       if ( unlikely(debug->irq_safe != irq_safe) )
>>       {
>> -        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
>> +        union lock_debug seen, new = { 0 };
>>    
>> -        if ( seen == !irq_safe )
>> +        new.irq_safe = irq_safe;
>> +        seen.val = cmpxchg(&debug->val, 0xffff, new.val);
> 
> This 0xffff should be connected (by way of at least a #define) to
> the one used in _LOCK_DEBUG.

Okay.

> 
>> +        if ( !seen.unused && seen.irq_safe == !irq_safe )
> 
> While "unused" makes sufficient sense here, ...
> 
>> --- a/xen/include/xen/spinlock.h
>> +++ b/xen/include/xen/spinlock.h
>> @@ -7,14 +7,20 @@
>>    #include <xen/percpu.h>
>>    
>>    #ifndef NDEBUG
>> -struct lock_debug {
>> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
>> +union lock_debug {
>> +    unsigned short val;
>> +    struct {
>> +        unsigned short unused:1;
> 
> ... it gives a rather misleading impression of this being an unused
> field here. How about e.g. "unseen" instead?

Yes, that seems to be the better choice.

> 
>> +        unsigned short irq_safe:1;
>> +        unsigned short pad:2;
>> +        unsigned short cpu:12;
>> +    };
>>    };
> 
> Do we have an implied assumption somewhere that unsigned short is
> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
> you really mean "exactly 16 bits"), the two boolean fields want
> to be bool, and the remaining two ones unsigned int.

But that would increase the size of the union to 4 bytes instead of 2.
So at least pad and cpu must be unsigned short or (better) uint16_t.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08  7:51     ` Juergen Gross
@ 2019-08-08  7:59       ` Jan Beulich
  2019-08-08 10:28       ` Julien Grall
  1 sibling, 0 replies; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  7:59 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.2019 09:51, Juergen Gross wrote:
> On 08.08.19 08:58, Jan Beulich wrote:
>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> +        unsigned short irq_safe:1;
>>> +        unsigned short pad:2;
>>> +        unsigned short cpu:12;
>>> +    };
>>>    };
>>
>> Do we have an implied assumption somewhere that unsigned short is
>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>> you really mean "exactly 16 bits"), the two boolean fields want
>> to be bool, and the remaining two ones unsigned int.
> 
> But that would increase the size of the union to 4 bytes instead of 2.
> So at least pad and cpu must be unsigned short or (better) uint16_t.

Oh, right.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface
  2019-08-08  7:32   ` Jan Beulich
@ 2019-08-08  8:13     ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  8:13 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 09:32, Jan Beulich wrote:
> On 07.08.2019 16:31, Juergen Gross wrote:
>> @@ -463,31 +464,67 @@ int spinlock_profile_control(struct xen_sysctl_lockprof_op *pc)
>>        return rc;
>>    }
>>    
>> -void _lock_profile_register_struct(
>> -    int32_t type, struct lock_profile_qhead *qhead, int32_t idx, char *name)
>> +static struct lock_profile_anc *find_prof_anc(const char *name)
>>    {
>> -    qhead->idx = idx;
>> +    struct lock_profile_anc *anc;
>> +
>> +    for ( anc = lock_profile_ancs; anc; anc = anc->next )
>> +        if ( !strcmp(anc->name, name) )
>> +            return anc;
>> +    return NULL;
>> +}
>> +
>> +void _lock_profile_register_struct(struct lock_profile_qhead *qhead,
>> +                                   int32_t idx, const char *name)
>> +{
>> +    struct lock_profile_anc *anc;
>> +
>>        spin_lock(&lock_profile_lock);
>> -    qhead->head_q = lock_profile_ancs[type].head_q;
>> -    lock_profile_ancs[type].head_q = qhead;
>> -    lock_profile_ancs[type].name = name;
>> +
>> +    anc = find_prof_anc(name);
>> +    if ( !anc )
>> +    {
>> +        anc = xzalloc(struct lock_profile_anc);
>> +        anc->name = name;
>> +        anc->next = lock_profile_ancs;
>> +        lock_profile_ancs = anc;
>> +    }
> 
> This is an imo fundamental weakness of the new model: You now
> require a dynamic memory allocation (of which you don't even
> check that it was successful). Right now the path above will
> only be taken at boot time, but that's not stated anywhere as
> a restriction (afaics), and hence doesn't need to remain this
> way.

Yes, allocation success must be checked, of course.

Adding a memory allocation in the path should be no real problem,
as normally registering a struct is accompanied by registering
the locks in that struct, which is already doing some memory
allocation. I should mention that in the comment section for the
usage of lock profiling.

> Furthermore "name" now serves two purposes when previously it
> served just one. This is best noticeable with the function's
> use in domain_create(): Previously that set up the class
> "per-domain" with an initial instance "struct domain". This
> did provide for someone later going and also registering
> another per-domain structure (e.g. struct p2m_domain) as
> another "per-domain" class instance. Then again maybe I'm not
> correctly understanding the original intentions ...

While the printout might be not as clear as desired, it will still
be correct. In both cases the "print" value will be used. The now
omitted "type" parameter was only used to decide whether to print
the index and for knowing what to print in the xenlockprof tool.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option
  2019-08-08  7:35       ` Jan Beulich
@ 2019-08-08  8:25         ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  8:25 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 09:35, Jan Beulich wrote:
> On 08.08.2019 09:23, Juergen Gross wrote:
>> On 08.08.19 08:34, Jan Beulich wrote:
>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>> --- a/xen/Kconfig.debug
>>>> +++ b/xen/Kconfig.debug
>>>> @@ -44,6 +44,13 @@ config COVERAGE
>>>>          If unsure, say N here.
>>>> +config SPINLOCK_DEBUG
>>>> +    bool "Spinlock debugging"
>>>> +    default DEBUG
>>>> +    ---help---
>>>> +      Enable debugging features of spinlock handling.  Some additional
>>>> +          checks will be performed when acquiring and releasing locks.
>>>> +
>>>>    config LOCK_PROFILE
>>>
>>> While the pre-existing LOCK_PROFILE suggests the opposite, I'd
>>> still like to propose that we uniformly name all debugging
>>> options CONFIG_DEBUG_* (rather than having the DEBUG at the
>>> end). Thoughts?
>>
>> Fine with me. I can rename the LOCK_PROFILE option in patch 4 as I'm
>> touching it anyway.
> 
> One more thing: Perhaps it would better be DEBUG_LOCK (i.e.
> without "SPIN") or DEBUG_LOCKS, to also allow it to cover r/w
> locks, should anyone want to instrument them as well.

Yes. I'll switch to DEBUG_LOCKS.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic()
  2019-08-08  7:09   ` Jan Beulich
@ 2019-08-08  8:26     ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  8:26 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 09:09, Jan Beulich wrote:
> On 07.08.2019 16:31, Juergen Gross wrote:
>> --- a/xen/Kconfig.debug
>> +++ b/xen/Kconfig.debug
>> @@ -53,6 +53,7 @@ config SPINLOCK_DEBUG
>>    
>>    config LOCK_PROFILE
>>    	bool "Lock Profiling"
>> +	select SPINLOCK_DEBUG
>>    	---help---
>>    	  Lock profiling allows you to see how often locks are taken and blocked.
>>    	  You can use serial console to print (and reset) using 'l' and 'L'
> 
> In which case, for sensible user experience, the selected
> option should come after this one. The way it is now afaict
> there'll be a prompt for SPINLOCK_DEBUG, the user may say
> "no", just to find that because of saying "yes" here it'll
> be turned on anyway. Whereas with switched ordering there'll
> be no prompt for the debug option at all (again afaict) if
> the profiling option was set to "yes".

Reordering is fine with me.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  4:50   ` Juergen Gross
@ 2019-08-08  8:33     ` Andrew Cooper
  2019-08-08  9:08       ` Jan Beulich
  2019-08-08  9:36       ` Juergen Gross
  0 siblings, 2 replies; 36+ messages in thread
From: Andrew Cooper @ 2019-08-08  8:33 UTC (permalink / raw)
  To: Juergen Gross, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 08/08/2019 05:50, Juergen Gross wrote:
> On 07.08.19 20:11, Andrew Cooper wrote:
>>
>> <snip>
>> Its not exactly the easiest to dump to follow.
>>
>> First of all - I don't see why the hold/block time are printed like
>> that.  It
>> might be a holdover from the 32bit build, pre PRId64 support.  They
>> should
>> probably use PRI_stime anyway.
>
> Fine with me.
>
>> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
>> involve rearranging the logic to get a struct domain* in hand. 
>> Seeing as
>> you're the last person to modify this code, how hard would that be to
>> do?
>
> It would completely break the struct type agnostic design.

Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
would be a trivial adjustment in the vsnprintf code, and could plausibly
be useful elsewhere where we have a domid and not a domain pointer.

>
> I have a debug patch adding credit2 run-queue lock. It requires to just
> add 5 lines of code (and this includes moving the spinlock_init() out of
> an irq-off section). It will produce:
>
> (XEN) credit2-runq 0 lock: addr=ffff830413351010, lockval=de00ddf, cpu=6
> (XEN)   lock: 896287(00000000:00FAA6B2), block:  819(00000000:00009C80)

What is the 0 here?

>
>> We have several global locks called lock:
>>
>>    (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
>>    (XEN)   lock:           1(00000000:01322165), block:           
>> 0(00000000:00000000)
>>    (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
>>    (XEN)   lock:           0(00000000:00000000), block:           
>> 0(00000000:00000000)
>>    (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
>>    (XEN)   lock:           0(00000000:00000000), block:           
>> 0(00000000:00000000)
>>    (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
>>    (XEN)   lock:           0(00000000:00000000), block:           
>> 0(00000000:00000000)
>>    (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
>>    (XEN)   lock:           0(00000000:00000000), block:           
>> 0(00000000:00000000)
>>
>> The second one in particular has corrupt data, seeing has it has been
>> taken
>> and released several times without lock_cnt increasing.
>
> The lock might have been taken/released before lock profiling has been
> initialized.

What is there to initialise?  It all looks statically set up.

>
>> For sanity sake, we should enforce unique naming of any lock
>> registered for
>> profiling.
>
> This would be every lock inited via DEFINE_SPINLOCK(). I can do a
> followup patch for that purpose.

I was wondering how to do this.  One option which comes to mind is to
put an non-static object into .discard.lock_profile or something, so the
linker will object to repeated symbol names and then throw all of them away.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  8:33     ` Andrew Cooper
@ 2019-08-08  9:08       ` Jan Beulich
  2019-08-08  9:45         ` Andrew Cooper
  2019-08-08  9:36       ` Juergen Gross
  1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08  9:08 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Juergen Gross, Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall, xen-devel

On 08.08.2019 10:33, Andrew Cooper wrote:
> On 08/08/2019 05:50, Juergen Gross wrote:
>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>
>>> <snip>
>>> Its not exactly the easiest to dump to follow.
>>>
>>> First of all - I don't see why the hold/block time are printed like
>>> that.  It
>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>> should
>>> probably use PRI_stime anyway.
>>
>> Fine with me.
>>
>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
>>> involve rearranging the logic to get a struct domain* in hand.
>>> Seeing as
>>> you're the last person to modify this code, how hard would that be to
>>> do?
>>
>> It would completely break the struct type agnostic design.
> 
> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
> would be a trivial adjustment in the vsnprintf code, and could plausibly
> be useful elsewhere where we have a domid and not a domain pointer.

At the risk of upsetting / annoying you: A domid_t would again
better be formatted via %od (and without the need to take the
address of a respective variable). In the case here it would
have the minor additional benefit of conserving on format string
length.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  8:33     ` Andrew Cooper
  2019-08-08  9:08       ` Jan Beulich
@ 2019-08-08  9:36       ` Juergen Gross
  2019-08-08 10:20         ` Andrew Cooper
  2019-08-08 15:26         ` Juergen Gross
  1 sibling, 2 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08  9:36 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 08.08.19 10:33, Andrew Cooper wrote:
> On 08/08/2019 05:50, Juergen Gross wrote:
>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>
>>> <snip>
>>> Its not exactly the easiest to dump to follow.
>>>
>>> First of all - I don't see why the hold/block time are printed like
>>> that.  It
>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>> should
>>> probably use PRI_stime anyway.
>>
>> Fine with me.
>>
>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
>>> involve rearranging the logic to get a struct domain* in hand.
>>> Seeing as
>>> you're the last person to modify this code, how hard would that be to
>>> do?
>>
>> It would completely break the struct type agnostic design.
> 
> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
> would be a trivial adjustment in the vsnprintf code, and could plausibly
> be useful elsewhere where we have a domid and not a domain pointer.

Lock profiling has no knowledge that it is working on a struct domain.
It is just working on a lock in a struct and an index to differentiate
the struct instance. Using the domid as the index is just for making it
more easy to understand the printout.

I wouldn't want e.g. a per-event lock to be named "IDLE" just because
it happens to be index 32767.

> 
>>
>> I have a debug patch adding credit2 run-queue lock. It requires to just
>> add 5 lines of code (and this includes moving the spinlock_init() out of
>> an irq-off section). It will produce:
>>
>> (XEN) credit2-runq 0 lock: addr=ffff830413351010, lockval=de00ddf, cpu=6
>> (XEN)   lock: 896287(00000000:00FAA6B2), block:  819(00000000:00009C80)
> 
> What is the 0 here?

The runq number.

> 
>>
>>> We have several global locks called lock:
>>>
>>>     (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
>>>     (XEN)   lock:           1(00000000:01322165), block:
>>> 0(00000000:00000000)
>>>     (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
>>>     (XEN)   lock:           0(00000000:00000000), block:
>>> 0(00000000:00000000)
>>>     (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
>>>     (XEN)   lock:           0(00000000:00000000), block:
>>> 0(00000000:00000000)
>>>     (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
>>>     (XEN)   lock:           0(00000000:00000000), block:
>>> 0(00000000:00000000)
>>>     (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
>>>     (XEN)   lock:           0(00000000:00000000), block:
>>> 0(00000000:00000000)
>>>
>>> The second one in particular has corrupt data, seeing has it has been
>>> taken
>>> and released several times without lock_cnt increasing.
>>
>> The lock might have been taken/released before lock profiling has been
>> initialized.
> 
> What is there to initialise?  It all looks statically set up.

lock->profile is set only in lock_prof_init().

> 
>>
>>> For sanity sake, we should enforce unique naming of any lock
>>> registered for
>>> profiling.
>>
>> This would be every lock inited via DEFINE_SPINLOCK(). I can do a
>> followup patch for that purpose.
> 
> I was wondering how to do this.  One option which comes to mind is to
> put an non-static object into .discard.lock_profile or something, so the
> linker will object to repeated symbol names and then throw all of them away.

I could just drop the "static" in the _LOCK_PROFILE_PTR() macro.
At the same time I should move the ".lockprofile.data" section in the
linker scripts to the init part maybe.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  9:08       ` Jan Beulich
@ 2019-08-08  9:45         ` Andrew Cooper
  2019-08-08 11:51           ` Jan Beulich
  0 siblings, 1 reply; 36+ messages in thread
From: Andrew Cooper @ 2019-08-08  9:45 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Juergen Gross, Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall, xen-devel

On 08/08/2019 10:08, Jan Beulich wrote:
> On 08.08.2019 10:33, Andrew Cooper wrote:
>> On 08/08/2019 05:50, Juergen Gross wrote:
>>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>> <snip>
>>>> Its not exactly the easiest to dump to follow.
>>>>
>>>> First of all - I don't see why the hold/block time are printed like
>>>> that.  It
>>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>>> should
>>>> probably use PRI_stime anyway.
>>> Fine with me.
>>>
>>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
>>>> involve rearranging the logic to get a struct domain* in hand.
>>>> Seeing as
>>>> you're the last person to modify this code, how hard would that be to
>>>> do?
>>> It would completely break the struct type agnostic design.
>> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
>> would be a trivial adjustment in the vsnprintf code, and could plausibly
>> be useful elsewhere where we have a domid and not a domain pointer.
> At the risk of upsetting / annoying you:

Yes you really have

> A domid_t would again
> better be formatted via %od (and without the need to take the
> address of a respective variable). In the case here it would
> have the minor additional benefit of conserving on format string
> length.

There are concrete reasons why it is a terrible idea, because none of
this has anything to do with octal, and that %od has a well defined
meaning which is not related to domid's.  There is also a very good
reason why all the magic is hidden behind one single formatter.

You seem hell bent on making it actively confusing and complicated to
write and read printk()'s, and I am not willing to lumber anyone
developing on Xen with this cognitive complexity.

I am stick to death of having the same over and over, so let me stop any
further wasting of time and be absolutely crystal clear.

NACK to any form of overloading %o

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  9:36       ` Juergen Gross
@ 2019-08-08 10:20         ` Andrew Cooper
  2019-08-08 12:02           ` Juergen Gross
  2019-08-08 15:26         ` Juergen Gross
  1 sibling, 1 reply; 36+ messages in thread
From: Andrew Cooper @ 2019-08-08 10:20 UTC (permalink / raw)
  To: Juergen Gross, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 08/08/2019 10:36, Juergen Gross wrote:
> On 08.08.19 10:33, Andrew Cooper wrote:
>> On 08/08/2019 05:50, Juergen Gross wrote:
>>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>>
>>>> <snip>
>>>> Its not exactly the easiest to dump to follow.
>>>>
>>>> First of all - I don't see why the hold/block time are printed like
>>>> that.  It
>>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>>> should
>>>> probably use PRI_stime anyway.
>>>
>>> Fine with me.
>>>
>>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that
>>>> would
>>>> involve rearranging the logic to get a struct domain* in hand.
>>>> Seeing as
>>>> you're the last person to modify this code, how hard would that be to
>>>> do?
>>>
>>> It would completely break the struct type agnostic design.
>>
>> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
>> would be a trivial adjustment in the vsnprintf code, and could plausibly
>> be useful elsewhere where we have a domid and not a domain pointer.
>
> Lock profiling has no knowledge that it is working on a struct domain.
> It is just working on a lock in a struct and an index to differentiate
> the struct instance. Using the domid as the index is just for making it
> more easy to understand the printout.
>
> I wouldn't want e.g. a per-event lock to be named "IDLE" just because
> it happens to be index 32767.

Ok, but clearly there is something which distinguishes domain indices
from other indices?

>
>>
>>>
>>>> We have several global locks called lock:
>>>>
>>>>     (XEN) Global lock: addr=ffff82d0808181e0, lockval=10001, cpu=4095
>>>>     (XEN)   lock:           1(00000000:01322165), block:
>>>> 0(00000000:00000000)
>>>>     (XEN) Global lock: addr=ffff82d080817cc0, lockval=100010, cpu=4095
>>>>     (XEN)   lock:           0(00000000:00000000), block:
>>>> 0(00000000:00000000)
>>>>     (XEN) Global lock: addr=ffff82d080817800, lockval=0000, cpu=4095
>>>>     (XEN)   lock:           0(00000000:00000000), block:
>>>> 0(00000000:00000000)
>>>>     (XEN) Global lock: addr=ffff82d080817780, lockval=0000, cpu=4095
>>>>     (XEN)   lock:           0(00000000:00000000), block:
>>>> 0(00000000:00000000)
>>>>     (XEN) Global lock: addr=ffff82d080817510, lockval=0000, cpu=4095
>>>>     (XEN)   lock:           0(00000000:00000000), block:
>>>> 0(00000000:00000000)
>>>>
>>>> The second one in particular has corrupt data, seeing has it has been
>>>> taken
>>>> and released several times without lock_cnt increasing.
>>>
>>> The lock might have been taken/released before lock profiling has been
>>> initialized.
>>
>> What is there to initialise?  It all looks statically set up.
>
> lock->profile is set only in lock_prof_init().

Ah - so it is.  I wonder if this can be done at compile time?  Its just
a backreference for the forward reference which is done at.

(Wow this code is complicated to follow).  I think it can be done with a
forward declaration of static struct lock_profile
__lock_profile_data_##l and passing something other than NULL into
_SPIN_LOCK_UNLOCKED(), but that will break the ability to do static
DEFINE_SPINLOCK().

Probably not worth messing with this now, but perhaps something to think
over.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08  7:51     ` Juergen Gross
  2019-08-08  7:59       ` Jan Beulich
@ 2019-08-08 10:28       ` Julien Grall
  2019-08-08 11:52         ` Jan Beulich
  2019-08-08 11:53         ` Juergen Gross
  1 sibling, 2 replies; 36+ messages in thread
From: Julien Grall @ 2019-08-08 10:28 UTC (permalink / raw)
  To: Juergen Gross, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel



On 08/08/2019 08:51, Juergen Gross wrote:
> On 08.08.19 08:58, Jan Beulich wrote:
>> On 07.08.2019 16:31, Juergen Gross wrote:
>> Do we have an implied assumption somewhere that unsigned short is
>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>> you really mean "exactly 16 bits"), the two boolean fields want
>> to be bool, and the remaining two ones unsigned int.
> 
> But that would increase the size of the union to 4 bytes instead of 2.
> So at least pad and cpu must be unsigned short or (better) uint16_t.

How about bool irq_safe:1?

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  9:45         ` Andrew Cooper
@ 2019-08-08 11:51           ` Jan Beulich
  0 siblings, 0 replies; 36+ messages in thread
From: Jan Beulich @ 2019-08-08 11:51 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Juergen Gross, Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk,
	George Dunlap, Tim Deegan, Ian Jackson, Julien Grall, xen-devel

On 08.08.2019 11:45, Andrew Cooper wrote:
> On 08/08/2019 10:08, Jan Beulich wrote:
>> On 08.08.2019 10:33, Andrew Cooper wrote:
>>> On 08/08/2019 05:50, Juergen Gross wrote:
>>>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>>> <snip>
>>>>> Its not exactly the easiest to dump to follow.
>>>>>
>>>>> First of all - I don't see why the hold/block time are printed like
>>>>> that.  It
>>>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>>>> should
>>>>> probably use PRI_stime anyway.
>>>> Fine with me.
>>>>
>>>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that would
>>>>> involve rearranging the logic to get a struct domain* in hand.
>>>>> Seeing as
>>>>> you're the last person to modify this code, how hard would that be to
>>>>> do?
>>>> It would completely break the struct type agnostic design.
>>> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
>>> would be a trivial adjustment in the vsnprintf code, and could plausibly
>>> be useful elsewhere where we have a domid and not a domain pointer.
>> At the risk of upsetting / annoying you:
> 
> Yes you really have
> 
>> A domid_t would again
>> better be formatted via %od (and without the need to take the
>> address of a respective variable). In the case here it would
>> have the minor additional benefit of conserving on format string
>> length.
> 
> There are concrete reasons why it is a terrible idea, because none of
> this has anything to do with octal, and that %od has a well defined
> meaning which is not related to domid's.

I'm curious to learn what well defined meaning %od has.

>  There is also a very good
> reason why all the magic is hidden behind one single formatter.
> 
> You seem hell bent on making it actively confusing and complicated to
> write and read printk()'s, and I am not willing to lumber anyone
> developing on Xen with this cognitive complexity.
> 
> I am stick to death of having the same over and over, so let me stop any
> further wasting of time and be absolutely crystal clear.
> 
> NACK to any form of overloading %o

In which case, if I took a similar position for the PCI SBDF
formatting using %pp, we'd be in a dead end. Waste of time or not
- while you have made crystal clear why you personally dislike
overloading %o, afaic you've not provided any non-subjective
(i.e. truly technical) reasons, which would be what might help
convince me of sticking to just %p overloading.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08 10:28       ` Julien Grall
@ 2019-08-08 11:52         ` Jan Beulich
  2019-08-08 11:53         ` Juergen Gross
  1 sibling, 0 replies; 36+ messages in thread
From: Jan Beulich @ 2019-08-08 11:52 UTC (permalink / raw)
  To: Julien Grall
  Cc: Juergen Gross, Stefano Stabellini, Wei Liu,
	Konrad Rzeszutek Wilk, George Dunlap, Andrew Cooper, Ian Jackson,
	Tim Deegan, xen-devel

On 08.08.2019 12:28, Julien Grall wrote:
> On 08/08/2019 08:51, Juergen Gross wrote:
>> On 08.08.19 08:58, Jan Beulich wrote:
>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> Do we have an implied assumption somewhere that unsigned short is
>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>> you really mean "exactly 16 bits"), the two boolean fields want
>>> to be bool, and the remaining two ones unsigned int.
>>
>> But that would increase the size of the union to 4 bytes instead of 2.
>> So at least pad and cpu must be unsigned short or (better) uint16_t.
> 
> How about bool irq_safe:1?

That's what I had suggested, indeed. Jürgen's response was for
my "unsigned int" suggestion towards the two non-boolean fields.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08 10:28       ` Julien Grall
  2019-08-08 11:52         ` Jan Beulich
@ 2019-08-08 11:53         ` Juergen Gross
  2019-08-08 12:18           ` Jan Beulich
  2019-08-08 15:32           ` Andrew Cooper
  1 sibling, 2 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08 11:53 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel

On 08.08.19 12:28, Julien Grall wrote:
> 
> 
> On 08/08/2019 08:51, Juergen Gross wrote:
>> On 08.08.19 08:58, Jan Beulich wrote:
>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> Do we have an implied assumption somewhere that unsigned short is
>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>> you really mean "exactly 16 bits"), the two boolean fields want
>>> to be bool, and the remaining two ones unsigned int.
>>
>> But that would increase the size of the union to 4 bytes instead of 2.
>> So at least pad and cpu must be unsigned short or (better) uint16_t.
> 
> How about bool irq_safe:1?

I didn't question that, but OTOH I'm not sure doing something like:

struct {
   bool     unseen:1;
   bool     irq_safe:1;
   uint16_t pad:2;
   uint16_t cpu:12;
}

is guaranteed to be 2 bytes long. I think pad will be still put into the
first byte, but the compiler might decide that the 4 bits left won't be
able to hold the next 12 bits so it could start a new uint16_t at offset
2.

Moving the bool types to the end of the struct would avoid that IMHO.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08 10:20         ` Andrew Cooper
@ 2019-08-08 12:02           ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08 12:02 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 08.08.19 12:20, Andrew Cooper wrote:
> On 08/08/2019 10:36, Juergen Gross wrote:
>> On 08.08.19 10:33, Andrew Cooper wrote:
>>> On 08/08/2019 05:50, Juergen Gross wrote:
>>>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>>>
>>>>> <snip>
>>>>> Its not exactly the easiest to dump to follow.
>>>>>
>>>>> First of all - I don't see why the hold/block time are printed like
>>>>> that.  It
>>>>> might be a holdover from the 32bit build, pre PRId64 support.  They
>>>>> should
>>>>> probably use PRI_stime anyway.
>>>>
>>>> Fine with me.
>>>>
>>>>> The domid rendering is unfortunate.  Ideally we'd use %pd but that
>>>>> would
>>>>> involve rearranging the logic to get a struct domain* in hand.
>>>>> Seeing as
>>>>> you're the last person to modify this code, how hard would that be to
>>>>> do?
>>>>
>>>> It would completely break the struct type agnostic design.
>>>
>>> Ok.  As an alternatively, how about %pdr which takes a raw domid?  It
>>> would be a trivial adjustment in the vsnprintf code, and could plausibly
>>> be useful elsewhere where we have a domid and not a domain pointer.
>>
>> Lock profiling has no knowledge that it is working on a struct domain.
>> It is just working on a lock in a struct and an index to differentiate
>> the struct instance. Using the domid as the index is just for making it
>> more easy to understand the printout.
>>
>> I wouldn't want e.g. a per-event lock to be named "IDLE" just because
>> it happens to be index 32767.
> 
> Ok, but clearly there is something which distinguishes domain indices
> from other indices?

Not for lock profiling.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08 11:53         ` Juergen Gross
@ 2019-08-08 12:18           ` Jan Beulich
  2019-08-08 12:49             ` Juergen Gross
  2019-08-08 15:32           ` Andrew Cooper
  1 sibling, 1 reply; 36+ messages in thread
From: Jan Beulich @ 2019-08-08 12:18 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.2019 13:53, Juergen Gross wrote:
> On 08.08.19 12:28, Julien Grall wrote:
>> On 08/08/2019 08:51, Juergen Gross wrote:
>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>> Do we have an implied assumption somewhere that unsigned short is
>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>> to be bool, and the remaining two ones unsigned int.
>>>
>>> But that would increase the size of the union to 4 bytes instead of 2.
>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>
>> How about bool irq_safe:1?
> 
> I didn't question that, but OTOH I'm not sure doing something like:
> 
> struct {
>    bool     unseen:1;
>    bool     irq_safe:1;
>    uint16_t pad:2;
>    uint16_t cpu:12;
> }
> 
> is guaranteed to be 2 bytes long. I think pad will be still put into the
> first byte, but the compiler might decide that the 4 bits left won't be
> able to hold the next 12 bits so it could start a new uint16_t at offset
> 2.
> 
> Moving the bool types to the end of the struct would avoid that IMHO.

Moving the two bool-s further down will also simplify extraction and
insertion of the "cpu" field.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08 12:18           ` Jan Beulich
@ 2019-08-08 12:49             ` Juergen Gross
  0 siblings, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08 12:49 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel

On 08.08.19 14:18, Jan Beulich wrote:
> On 08.08.2019 13:53, Juergen Gross wrote:
>> On 08.08.19 12:28, Julien Grall wrote:
>>> On 08/08/2019 08:51, Juergen Gross wrote:
>>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>>> Do we have an implied assumption somewhere that unsigned short is
>>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>>> to be bool, and the remaining two ones unsigned int.
>>>>
>>>> But that would increase the size of the union to 4 bytes instead of 2.
>>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>>
>>> How about bool irq_safe:1?
>>
>> I didn't question that, but OTOH I'm not sure doing something like:
>>
>> struct {
>>     bool     unseen:1;
>>     bool     irq_safe:1;
>>     uint16_t pad:2;
>>     uint16_t cpu:12;
>> }
>>
>> is guaranteed to be 2 bytes long. I think pad will be still put into the
>> first byte, but the compiler might decide that the 4 bits left won't be
>> able to hold the next 12 bits so it could start a new uint16_t at offset
>> 2.
>>
>> Moving the bool types to the end of the struct would avoid that IMHO.
> 
> Moving the two bool-s further down will also simplify extraction and
> insertion of the "cpu" field.

Okay, lets reverse above struct.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 0/4] enhance lock debugging
  2019-08-08  9:36       ` Juergen Gross
  2019-08-08 10:20         ` Andrew Cooper
@ 2019-08-08 15:26         ` Juergen Gross
  1 sibling, 0 replies; 36+ messages in thread
From: Juergen Gross @ 2019-08-08 15:26 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel
  Cc: Stefano Stabellini, WeiLiu, Konrad Rzeszutek Wilk, George Dunlap,
	Tim Deegan, Ian Jackson, Julien Grall, Jan Beulich

On 08.08.19 11:36, Juergen Gross wrote:
> On 08.08.19 10:33, Andrew Cooper wrote:
>> On 08/08/2019 05:50, Juergen Gross wrote:
>>> On 07.08.19 20:11, Andrew Cooper wrote:
>>>> For sanity sake, we should enforce unique naming of any lock
>>>> registered for
>>>> profiling.
>>>
>>> This would be every lock inited via DEFINE_SPINLOCK(). I can do a
>>> followup patch for that purpose.
>>
>> I was wondering how to do this.  One option which comes to mind is to
>> put an non-static object into .discard.lock_profile or something, so the
>> linker will object to repeated symbol names and then throw all of them 
>> away.
> 
> I could just drop the "static" in the _LOCK_PROFILE_PTR() macro.

Unfortunately this doesn't work for locks defined in a function body.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock
  2019-08-08 11:53         ` Juergen Gross
  2019-08-08 12:18           ` Jan Beulich
@ 2019-08-08 15:32           ` Andrew Cooper
  1 sibling, 0 replies; 36+ messages in thread
From: Andrew Cooper @ 2019-08-08 15:32 UTC (permalink / raw)
  To: Juergen Gross, Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Ian Jackson, Tim Deegan, xen-devel

On 08/08/2019 12:53, Juergen Gross wrote:
> On 08.08.19 12:28, Julien Grall wrote:
>>
>>
>> On 08/08/2019 08:51, Juergen Gross wrote:
>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>> Do we have an implied assumption somewhere that unsigned short is
>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>> to be bool, and the remaining two ones unsigned int.
>>>
>>> But that would increase the size of the union to 4 bytes instead of 2.
>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>
>> How about bool irq_safe:1?
>
> I didn't question that, but OTOH I'm not sure doing something like:
>
> struct {
>   bool     unseen:1;
>   bool     irq_safe:1;
>   uint16_t pad:2;
>   uint16_t cpu:12;
> }
>
> is guaranteed to be 2 bytes long.

It will be on anything which is GCC-compatible.  All scalar bitfields
gets tightly packed even when they differ in base type.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2019-08-08 15:32 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-07 14:31 [Xen-devel] [PATCH 0/4] enhance lock debugging Juergen Gross
2019-08-07 14:31 ` [Xen-devel] [PATCH 1/4] xen/spinlocks: in debug builds store cpu holding the lock Juergen Gross
2019-08-07 17:05   ` Andrew Cooper
2019-08-08  4:34     ` Juergen Gross
2019-08-08  6:58   ` Jan Beulich
2019-08-08  7:51     ` Juergen Gross
2019-08-08  7:59       ` Jan Beulich
2019-08-08 10:28       ` Julien Grall
2019-08-08 11:52         ` Jan Beulich
2019-08-08 11:53         ` Juergen Gross
2019-08-08 12:18           ` Jan Beulich
2019-08-08 12:49             ` Juergen Gross
2019-08-08 15:32           ` Andrew Cooper
2019-08-07 14:31 ` [Xen-devel] [PATCH 2/4] xen: add new CONFIG_SPINLOCK_DEBUG option Juergen Gross
2019-08-07 17:17   ` Andrew Cooper
2019-08-08  4:35     ` Juergen Gross
2019-08-08  6:34   ` Jan Beulich
2019-08-08  7:23     ` Juergen Gross
2019-08-08  7:35       ` Jan Beulich
2019-08-08  8:25         ` Juergen Gross
2019-08-07 14:31 ` [Xen-devel] [PATCH 3/4] xen: print lock profile info in panic() Juergen Gross
2019-08-08  7:09   ` Jan Beulich
2019-08-08  8:26     ` Juergen Gross
2019-08-07 14:31 ` [Xen-devel] [PATCH 4/4] xen: modify lock profiling interface Juergen Gross
2019-08-08  7:32   ` Jan Beulich
2019-08-08  8:13     ` Juergen Gross
2019-08-07 18:11 ` [Xen-devel] [PATCH 0/4] enhance lock debugging Andrew Cooper
2019-08-08  4:50   ` Juergen Gross
2019-08-08  8:33     ` Andrew Cooper
2019-08-08  9:08       ` Jan Beulich
2019-08-08  9:45         ` Andrew Cooper
2019-08-08 11:51           ` Jan Beulich
2019-08-08  9:36       ` Juergen Gross
2019-08-08 10:20         ` Andrew Cooper
2019-08-08 12:02           ` Juergen Gross
2019-08-08 15:26         ` Juergen Gross

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).