linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
@ 2008-11-18 11:00 Yang Xi
  2008-11-18 16:20 ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yang Xi @ 2008-11-18 11:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo

Because the implementation of X86 spinlock adopts ticket lock, we can
know how many threads are waiting for the lock when there is a
contention on the spinlock. We add the number of these threads to the
bounces[bounce_hungry] and name "bounces[bounce_hungry]" as
"con-hungry". Finally, we can use this number to show that "the number
of threads waiting for a spinlock/ a contention". From it, we can know
that whether it is valuable to replace the spinlock  with more
scalable spinlock since the design principle of scalability spinlock
is to decrease the cache invalid cost, if the "con-hungry/contentions"
is not high, we cannot benefit from more scalable spinlocks.


Signed-off-by: Yang Xi <hiyangxi@gmail.com>


diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..a71398e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,7 @@ enum bounce_type {
        bounce_acquired_read,
        bounce_contended_write,
        bounce_contended_read,
+       bounce_hungry,
        nr_bounce_types,

        bounce_acquired = bounce_acquired_write,
@@ -165,6 +166,7 @@ struct lockdep_map {
        const char                      *name;
 #ifdef CONFIG_LOCK_STAT
        int                             cpu;
+       int                             isspinlock;
 #endif
 };

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..b3314ed 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3000,6 +3000,20 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
        struct lock_class_stats *stats;
        unsigned int depth;
        int i, point;
+       spinlock_t * lock_ptr;
+       unsigned long hungry;
+
+       if (lock->isspinlock){
+               lock_ptr = container_of(lock,spinlock_t,dep_map);
+
+#if (NR_CPUS < 256)
+               hungry=(unsigned
char)((lock_ptr->raw_lock.slock>>8&0xff)-(lock_ptr->raw_lock.slock&0xff));
+
+#else
+               hungry=(unsigned
short)((lock_ptr->raw_lock.slock>>16&0xffff)-(lock_ptr->raw_lock.slock&0xffff));
+
+#endif
+       }

        depth = curr->lockdep_depth;
        if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3030,9 +3044,13 @@ found_it:
                stats->contention_point[point]++;
        if (lock->cpu != smp_processor_id())
                stats->bounces[bounce_contended + !!hlock->read]++;
+       if (lock->isspinlock){
+               stats->bounces[bounce_hungry]+=hungry+1;
+       }
        put_lock_stats(stats);
 }

+
 static void
 __lock_acquired(struct lockdep_map *lock)
 {
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..2fc3ca0 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -534,7 +534,8 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
                else
                        seq_printf(m, "%40s:", name);

-               seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ",
stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ", stats->bounces[bounce_hungry]);
                seq_lock_time(m, &stats->write_waittime);
                seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
                seq_lock_time(m, &stats->write_holdtime);
@@ -584,10 +585,11 @@ static void seq_header(struct seq_file *m)
 {
        seq_printf(m, "lock_stat version 0.2\n");
        seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
                        "class name",
                        "con-bounces",
+                       "con-hungry",
                        "contentions",
                        "waittime-min",
                        "waittime-max",
@@ -597,7 +599,7 @@ static void seq_header(struct seq_file *m)
                        "holdtime-min",
                        "holdtime-max",
                        "holdtime-total");
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+       seq_line(m, '-', 0, 40 + 1 + 11 * (14 + 1));
        seq_printf(m, "\n");
 }

diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..2ec34ec 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,10 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
        lock->magic = SPINLOCK_MAGIC;
        lock->owner = SPINLOCK_OWNER_INIT;
        lock->owner_cpu = -1;
+       lock->dep_map.isspinlock = 0;
+#if ((defined(CONFIG_X86) || defined(CONFIG_X86_64))&&
defined(CONFIG_LOCK_STAT))
+       lock->dep_map.isspinlock = 1;
+#endif
 }

 EXPORT_SYMBOL(__spin_lock_init);
coder@coder-desktop:~/work/linux/linux-2.6$ git diff -u -p
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..a71398e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,7 @@ enum bounce_type {
        bounce_acquired_read,
        bounce_contended_write,
        bounce_contended_read,
+       bounce_hungry,
        nr_bounce_types,

        bounce_acquired = bounce_acquired_write,
@@ -165,6 +166,7 @@ struct lockdep_map {
        const char                      *name;
 #ifdef CONFIG_LOCK_STAT
        int                             cpu;
+       int                             isspinlock;
 #endif
 };

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..b3314ed 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3000,6 +3000,20 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
        struct lock_class_stats *stats;
        unsigned int depth;
        int i, point;
+       spinlock_t * lock_ptr;
+       unsigned long hungry;
+
+       if (lock->isspinlock){
+               lock_ptr = container_of(lock,spinlock_t,dep_map);
+
+#if (NR_CPUS < 256)
+               hungry=(unsigned
char)((lock_ptr->raw_lock.slock>>8&0xff)-(lock_ptr->raw_lock.slock&0xff));
+
+#else
+               hungry=(unsigned
short)((lock_ptr->raw_lock.slock>>16&0xffff)-(lock_ptr->raw_lock.slock&0xffff));
+
+#endif
+       }

        depth = curr->lockdep_depth;
        if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3030,9 +3044,13 @@ found_it:
                stats->contention_point[point]++;
        if (lock->cpu != smp_processor_id())
                stats->bounces[bounce_contended + !!hlock->read]++;
+       if (lock->isspinlock){
+               stats->bounces[bounce_hungry]+=hungry+1;
+       }
        put_lock_stats(stats);
 }

+
 static void
 __lock_acquired(struct lockdep_map *lock)
 {
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..2fc3ca0 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -534,7 +534,8 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
                else
                        seq_printf(m, "%40s:", name);

-               seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ",
stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ", stats->bounces[bounce_hungry]);
                seq_lock_time(m, &stats->write_waittime);
                seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
                seq_lock_time(m, &stats->write_holdtime);
@@ -584,10 +585,11 @@ static void seq_header(struct seq_file *m)
 {
        seq_printf(m, "lock_stat version 0.2\n");
        seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
                        "class name",
                        "con-bounces",
+                       "con-hungry",
                        "contentions",
                        "waittime-min",
                        "waittime-max",
@@ -597,7 +599,7 @@ static void seq_header(struct seq_file *m)
                        "holdtime-min",
                        "holdtime-max",
                        "holdtime-total");
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+       seq_line(m, '-', 0, 40 + 1 + 11 * (14 + 1));
        seq_printf(m, "\n");
 }

diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..2ec34ec 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,10 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
        lock->magic = SPINLOCK_MAGIC;
        lock->owner = SPINLOCK_OWNER_INIT;
        lock->owner_cpu = -1;
+       lock->dep_map.isspinlock = 0;
+#if ((defined(CONFIG_X86) || defined(CONFIG_X86_64))&&
defined(CONFIG_LOCK_STAT))
+       lock->dep_map.isspinlock = 1;
+#endif
 }

 EXPORT_SYMBOL(__spin_lock_init);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-18 11:00 [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock Yang Xi
@ 2008-11-18 16:20 ` Peter Zijlstra
  2008-11-19  5:18   ` Yang Xi
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2008-11-18 16:20 UTC (permalink / raw)
  To: Yang Xi; +Cc: linux-kernel, mingo

On Tue, 2008-11-18 at 19:00 +0800, Yang Xi wrote:
> Because the implementation of X86 spinlock adopts ticket lock, we can
> know how many threads are waiting for the lock when there is a
> contention on the spinlock. We add the number of these threads to the
> bounces[bounce_hungry] and name "bounces[bounce_hungry]" as
> "con-hungry". Finally, we can use this number to show that "the number
> of threads waiting for a spinlock/ a contention". From it, we can know
> that whether it is valuable to replace the spinlock  with more
> scalable spinlock since the design principle of scalability spinlock
> is to decrease the cache invalid cost, if the "con-hungry/contentions"
> is not high, we cannot benefit from more scalable spinlocks.
> 
> 
> Signed-off-by: Yang Xi <hiyangxi@gmail.com>

While I like the idea, the code is horrid, please clean this up and try
again.

The lockstat code should not ever need to know about lock implementation
details like which you #ifdeffed in.

> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 331e5f1..a71398e 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -136,6 +136,7 @@ enum bounce_type {
>         bounce_acquired_read,
>         bounce_contended_write,
>         bounce_contended_read,
> +       bounce_hungry,
>         nr_bounce_types,
> 
>         bounce_acquired = bounce_acquired_write,
> @@ -165,6 +166,7 @@ struct lockdep_map {
>         const char                      *name;
>  #ifdef CONFIG_LOCK_STAT
>         int                             cpu;
> +       int                             isspinlock;
>  #endif

perhaps a bitfield?

>  };
> 
> diff --git a/kernel/lockdep.c b/kernel/lockdep.c
> index 06e1571..b3314ed 100644
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -3000,6 +3000,20 @@ __lock_contended(struct lockdep_map *lock,
> unsigned long ip)
>         struct lock_class_stats *stats;
>         unsigned int depth;
>         int i, point;
> +       spinlock_t * lock_ptr;
> +       unsigned long hungry;
> +
> +       if (lock->isspinlock){
> +               lock_ptr = container_of(lock,spinlock_t,dep_map);
> +
> +#if (NR_CPUS < 256)
> +               hungry=(unsigned
> char)((lock_ptr->raw_lock.slock>>8&0xff)-(lock_ptr->raw_lock.slock&0xff));
> +
> +#else
> +               hungry=(unsigned
> short)((lock_ptr->raw_lock.slock>>16&0xffff)-(lock_ptr->raw_lock.slock&0xffff));
> +
> +#endif

Prime example of uglyness that is unacceptable.

> +       }
> 
>         depth = curr->lockdep_depth;
>         if (DEBUG_LOCKS_WARN_ON(!depth))
> @@ -3030,9 +3044,13 @@ found_it:
>                 stats->contention_point[point]++;
>         if (lock->cpu != smp_processor_id())
>                 stats->bounces[bounce_contended + !!hlock->read]++;
> +       if (lock->isspinlock){
> +               stats->bounces[bounce_hungry]+=hungry+1;
> +       }
>         put

superfluous braces.


> diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
> index 9c4b025..2ec34ec 100644
> --- a/lib/spinlock_debug.c
> +++ b/lib/spinlock_debug.c
> @@ -27,6 +27,10 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
>         lock->magic = SPINLOCK_MAGIC;
>         lock->owner = SPINLOCK_OWNER_INIT;
>         lock->owner_cpu = -1;
> +       lock->dep_map.isspinlock = 0;
> +#if ((defined(CONFIG_X86) || defined(CONFIG_X86_64))&&
> defined(CONFIG_LOCK_STAT))
> +       lock->dep_map.isspinlock = 1;
> +#endif

sorry, no, imagine the utter mess this will become when some other arch
adds ticket locks too...


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-18 16:20 ` Peter Zijlstra
@ 2008-11-19  5:18   ` Yang Xi
  2008-11-19 16:39     ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yang Xi @ 2008-11-19  5:18 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo

Thanks. This should be better. I add __ticket_spin_nm_contended in
x86/include/asm/spinlock.h to return the number of threads
waiting/holding for the ticket spinlock(Note: The number contains the
holder). If the spinlock is ticket lock, "spin_nm_contended" will be
__ticket_spin_nm_contended, otherwise, it will be 0.

Signed-off-by: Yang Xi <hiyangxi@gmail.com>

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index d17c919..88c3774 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,6 +172,13 @@ static inline int
__ticket_spin_is_contended(raw_spinlock_t *lock)
        return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }

+static inline int __ticket_spin_nm_contended(raw_spinlock_t *lock)
+{
+       int tmp = ACCESS_ONCE(lock->slock);
+
+       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) + 1;
+}
+
 #ifdef CONFIG_PARAVIRT
 /*
  * Define virtualization-friendly old-style lock byte lock, for use in
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..5bc0a2f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,7 @@ enum bounce_type {
        bounce_acquired_read,
        bounce_contended_write,
        bounce_contended_read,
+       bounce_hungry,
        nr_bounce_types,

        bounce_acquired = bounce_acquired_write,
@@ -165,6 +166,7 @@ struct lockdep_map {
        const char                      *name;
 #ifdef CONFIG_LOCK_STAT
        int                             cpu;
+       unsigned int                    isspinlock:1;
 #endif
 };

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e0c0fcc..322190d 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -127,6 +127,12 @@ do {
                 \
 #define spin_is_contended(lock)
__raw_spin_is_contended(&(lock)->raw_lock)
 #endif

+#ifdef TICKET_SHIFT
+#define spin_nm_contended(lock) __ticket_spin_nm_contended(&(lock)->raw_lock)
+#else
+#define spin_nm_contended(lock) (0)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..5fe9e8a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3000,7 +3000,14 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
        struct lock_class_stats *stats;
        unsigned int depth;
        int i, point;
-
+       spinlock_t * lock_ptr;
+       unsigned long hungry = 0;
+
+       if (lock->isspinlock) {
+               lock_ptr = container_of(lock,spinlock_t,dep_map);
+               hungry = spin_nm_contended(lock_ptr);
+       }
+
        depth = curr->lockdep_depth;
        if (DEBUG_LOCKS_WARN_ON(!depth))
                return;
@@ -3029,10 +3036,13 @@ found_it:
        if (point < ARRAY_SIZE(stats->contention_point))
                stats->contention_point[point]++;
        if (lock->cpu != smp_processor_id())
-               stats->bounces[bounce_contended + !!hlock->read]++;
+               stats->bounces[bounce_contended + !!hlock->read]++;
+       stats->bounces[bounce_hungry]+=hungry;
+
        put_lock_stats(stats);
 }

+
 static void
 __lock_acquired(struct lockdep_map *lock)
 {
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..2fc3ca0 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -534,7 +534,8 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
                else
                        seq_printf(m, "%40s:", name);

-               seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ",
stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ", stats->bounces[bounce_hungry]);
                seq_lock_time(m, &stats->write_waittime);
                seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
                seq_lock_time(m, &stats->write_holdtime);
@@ -584,10 +585,11 @@ static void seq_header(struct seq_file *m)
 {
        seq_printf(m, "lock_stat version 0.2\n");
        seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
                        "class name",
                        "con-bounces",
+                       "con-hungry",
                        "contentions",
                        "waittime-min",
                        "waittime-max",
@@ -597,7 +599,7 @@ static void seq_header(struct seq_file *m)
                        "holdtime-min",
                        "holdtime-max",
                        "holdtime-total");
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+       seq_line(m, '-', 0, 40 + 1 + 11 * (14 + 1));
        seq_printf(m, "\n");
 }

diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..5c6e06f 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
        lock->magic = SPINLOCK_MAGIC;
        lock->owner = SPINLOCK_OWNER_INIT;
        lock->owner_cpu = -1;
+       lock->dep_map.isspinlock = 1;
 }

 EXPORT_SYMBOL(__spin_lock_init);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-19  5:18   ` Yang Xi
@ 2008-11-19 16:39     ` Peter Zijlstra
  2008-11-20  8:09       ` Yang Xi
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2008-11-19 16:39 UTC (permalink / raw)
  To: Yang Xi; +Cc: linux-kernel, mingo

On Wed, 2008-11-19 at 13:18 +0800, Yang Xi wrote:
> Thanks. This should be better. I add __ticket_spin_nm_contended in
> x86/include/asm/spinlock.h to return the number of threads
> waiting/holding for the ticket spinlock(Note: The number contains the
> holder). If the spinlock is ticket lock, "spin_nm_contended" will be
> __ticket_spin_nm_contended, otherwise, it will be 0.

Much better indeed, still some comments though :-)

> Signed-off-by: Yang Xi <hiyangxi@gmail.com>
> 
> diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
> index d17c919..88c3774 100644
> --- a/arch/x86/include/asm/spinlock.h
> +++ b/arch/x86/include/asm/spinlock.h
> @@ -172,6 +172,13 @@ static inline int
> __ticket_spin_is_contended(raw_spinlock_t *lock)
>         return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
>  }
> 
> +static inline int __ticket_spin_nm_contended(raw_spinlock_t *lock)
> +{
> +       int tmp = ACCESS_ONCE(lock->slock);
> +
> +       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) + 1;
> +}
> +
>  #ifdef CONFIG_PARAVIRT
>  /*
>   * Define virtualization-friendly old-style lock byte lock, for use in
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 331e5f1..5bc0a2f 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -136,6 +136,7 @@ enum bounce_type {
>         bounce_acquired_read,
>         bounce_contended_write,
>         bounce_contended_read,
> +       bounce_hungry,
>         nr_bounce_types,
> 
>         bounce_acquired = bounce_acquired_write,
> @@ -165,6 +166,7 @@ struct lockdep_map {
>         const char                      *name;
>  #ifdef CONFIG_LOCK_STAT
>         int                             cpu;
> +       unsigned int                    isspinlock:1;

I of course meant folding cpu and isspinlock into a combined bitfield
(sorry for not being more clear), thereby saving space, this still takes
2*sizeof(int).

We can safely take some bits from the cpu number as there currently are
no plans for a 2g cpu machine, right SGI? :-)

>  #endif
>  };
> 
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index e0c0fcc..322190d 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -127,6 +127,12 @@ do {
>                  \
>  #define spin_is_contended(lock)
> __raw_spin_is_contended(&(lock)->raw_lock)
>  #endif
> 
> +#ifdef TICKET_SHIFT
> +#define spin_nm_contended(lock) __ticket_spin_nm_contended(&(lock)->raw_lock)
> +#else
> +#define spin_nm_contended(lock) (0)
> +#endif

This is a bit icky still, I don't think TICKET_SHIFT is nessecarily the
best macro to check on (other ticket lock implementations might not
define it).

A possible solution is to introduce a Kconfig variable HAVE_TICKET_LOCK
and select that from x86.

Also, may I suggest another name, spin_nr_contended() perhaps?

>  /**
>   * spin_unlock_wait - wait until the spinlock gets unlocked
>   * @lock: the spinlock in question.
> diff --git a/kernel/lockdep.c b/kernel/lockdep.c
> index 06e1571..5fe9e8a 100644
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -3000,7 +3000,14 @@ __lock_contended(struct lockdep_map *lock,
> unsigned long ip)
>         struct lock_class_stats *stats;
>         unsigned int depth;
>         int i, point;
> -
> +       spinlock_t * lock_ptr;
> +       unsigned long hungry = 0;

This violates coding style, please run checkpatch.

> +
> +       if (lock->isspinlock) {
> +               lock_ptr = container_of(lock,spinlock_t,dep_map);
> +               hungry = spin_nm_contended(lock_ptr);
> +       }
> +
>         depth = curr->lockdep_depth;
>         if (DEBUG_LOCKS_WARN_ON(!depth))
>                 return;

Hth,

Peter


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-19 16:39     ` Peter Zijlstra
@ 2008-11-20  8:09       ` Yang Xi
  2008-11-23  1:40         ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Yang Xi @ 2008-11-20  8:09 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, chyyuu

> I of course meant folding cpu and isspinlock into a combined bitfield
> (sorry for not being more clear), thereby saving space, this still takes
> 2*sizeof(int).
>
> We can safely take some bits from the cpu number as there currently are
> no plans for a 2g cpu machine, right SGI? :-)
Thanks, ok, 31bits enough for cpu at now stage. 1bit for isticketspinlock.
Here is the new one. Welcome more comments :).

Signed-off-by: Yang Xi  <hiyangxi@gmail.com>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab1..20fee72 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
        select HAVE_ARCH_TRACEHOOK
        select HAVE_GENERIC_DMA_COHERENT if X86_32
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
+       select HAVE_TICKET_SPINLOCK

 config ARCH_DEFCONFIG
        string
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index d17c919..da9cffc 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,6 +172,13 @@ static inline int
__ticket_spin_is_contended(raw_spinlock_t *lock)
        return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }

+static inline int __ticket_spin_nr_contended(raw_spinlock_t *lock)
+{
+       int tmp = ACCESS_ONCE(lock->slock);
+
+       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) + 1;
+}
+
 #ifdef CONFIG_PARAVIRT
 /*
  * Define virtualization-friendly old-style lock byte lock, for use in
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..3b88f0c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,7 @@ enum bounce_type {
        bounce_acquired_read,
        bounce_contended_write,
        bounce_contended_read,
+       bounce_hungry,
        nr_bounce_types,

        bounce_acquired = bounce_acquired_write,
@@ -164,7 +165,8 @@ struct lockdep_map {
        struct lock_class               *class_cache;
        const char                      *name;
 #ifdef CONFIG_LOCK_STAT
-       int                             cpu;
+       unsigned int                    cpu:31;
+       unsigned int                    isticketspinlock:1;
 #endif
 };

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e0c0fcc..e81c956 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -127,6 +127,12 @@ do {
                 \
 #define spin_is_contended(lock)
__raw_spin_is_contended(&(lock)->raw_lock)
 #endif

+#ifdef CONFIG_HAVE_TICKET_SPINLOCK
+#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
+#else
+#define spin_nr_contended(lock) (0)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..28cd04e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3000,6 +3000,13 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
        struct lock_class_stats *stats;
        unsigned int depth;
        int i, point;
+       spinlock_t *lock_ptr;
+       unsigned long hungry = 0;
+
+       if (lock->isticketspinlock) {
+               lock_ptr = container_of(lock, spinlock_t, dep_map);
+               hungry = spin_nr_contended(lock_ptr);
+       }

        depth = curr->lockdep_depth;
        if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3030,9 +3037,12 @@ found_it:
                stats->contention_point[point]++;
        if (lock->cpu != smp_processor_id())
                stats->bounces[bounce_contended + !!hlock->read]++;
+       stats->bounces[bounce_hungry] += hungry;
+
        put_lock_stats(stats);
 }

+
 static void
 __lock_acquired(struct lockdep_map *lock)
 {
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..585601f 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -535,6 +535,7 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
                        seq_printf(m, "%40s:", name);

                seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+               seq_printf(m, "%14lu ", stats->bounces[bounce_hungry]);
                seq_lock_time(m, &stats->write_waittime);
                seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
                seq_lock_time(m, &stats->write_holdtime);
@@ -584,11 +585,12 @@ static void seq_header(struct seq_file *m)
 {
        seq_printf(m, "lock_stat version 0.2\n");
        seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+       seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
                        "%14s %14s\n",
                        "class name",
                        "con-bounces",
-                       "contentions",
+                       "con-hungry",
+                       "contentions",
                        "waittime-min",
                        "waittime-max",
                        "waittime-total",
@@ -597,7 +599,7 @@ static void seq_header(struct seq_file *m)
                        "holdtime-min",
                        "holdtime-max",
                        "holdtime-total");
-       seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+       seq_line(m, '-', 0, 40 + 1 + 11 * (14 + 1));
        seq_printf(m, "\n");
 }

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e..b9eb62a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -318,6 +318,9 @@ config RT_MUTEX_TESTER
        help
          This option enables a rt-mutex tester.

+config HAVE_TICKET_SPINLOCK
+       bool
+
 config DEBUG_SPINLOCK
        bool "Spinlock and rw-lock debugging: basic checks"
        depends on DEBUG_KERNEL
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..bba8d3e 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
        lock->magic = SPINLOCK_MAGIC;
        lock->owner = SPINLOCK_OWNER_INIT;
        lock->owner_cpu = -1;
+       lock->dep_map.isticketspinlock = 1;
 }

 EXPORT_SYMBOL(__spin_lock_init);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-20  8:09       ` Yang Xi
@ 2008-11-23  1:40         ` Peter Zijlstra
  2008-11-23  8:23           ` Yang Xi
  2008-12-26  7:24           ` Yang Xi
  0 siblings, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2008-11-23  1:40 UTC (permalink / raw)
  To: Yang Xi; +Cc: linux-kernel, mingo, chyyuu

On Thu, 2008-11-20 at 16:09 +0800, Yang Xi wrote:
> > I of course meant folding cpu and isspinlock into a combined bitfield
> > (sorry for not being more clear), thereby saving space, this still takes
> > 2*sizeof(int).
> >
> > We can safely take some bits from the cpu number as there currently are
> > no plans for a 2g cpu machine, right SGI? :-)
> Thanks, ok, 31bits enough for cpu at now stage. 1bit for isticketspinlock.
> Here is the new one. Welcome more comments :).

Looks good, one more question :-)

> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -127,6 +127,12 @@ do {
>                  \
>  #define spin_is_contended(lock)
> __raw_spin_is_contended(&(lock)->raw_lock)
>  #endif
> 
> +#ifdef CONFIG_HAVE_TICKET_SPINLOCK
> +#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
> +#else
> +#define spin_nr_contended(lock) (0)
> +#endif
> +

Does it make sense to make the alternative case return
spin_is_contended()?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-23  1:40         ` Peter Zijlstra
@ 2008-11-23  8:23           ` Yang Xi
  2008-12-26  7:24           ` Yang Xi
  1 sibling, 0 replies; 10+ messages in thread
From: Yang Xi @ 2008-11-23  8:23 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, chyyuu, Fu, Michael

On Sun, Nov 23, 2008 at 9:40 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Thu, 2008-11-20 at 16:09 +0800, Yang Xi wrote:
>> > I of course meant folding cpu and isspinlock into a combined bitfield
>> > (sorry for not being more clear), thereby saving space, this still takes
>> > 2*sizeof(int).
>> >
>> > We can safely take some bits from the cpu number as there currently are
>> > no plans for a 2g cpu machine, right SGI? :-)
>> Thanks, ok, 31bits enough for cpu at now stage. 1bit for isticketspinlock.
>> Here is the new one. Welcome more comments :).
>
> Looks good, one more question :-)
>
>> --- a/include/linux/spinlock.h
>> +++ b/include/linux/spinlock.h
>> @@ -127,6 +127,12 @@ do {
>>                  \
>>  #define spin_is_contended(lock)
>> __raw_spin_is_contended(&(lock)->raw_lock)
>>  #endif
>>
>> +#ifdef CONFIG_HAVE_TICKET_SPINLOCK
>> +#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
>> +#else
>> +#define spin_nr_contended(lock) (0)
>> +#endif
>> +
>
> Does it make sense to make the alternative case return
> spin_is_contended()?
You mean do it like this?
#ifdef CONFIG_HAVE_TICKET_SPINLOCK
#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
#else
#define spin_nr_contended(lock) (spin_is_contended(lock) ? 1: 0)

If we do it like this, for some architectures which donot have ticket
spinlock, we can know that the percentage of contentions which involve
more than two threads (con-hungry/ contention times). But i donot know
whether there are some guys are interested in this number.

Best regards

>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-11-23  1:40         ` Peter Zijlstra
  2008-11-23  8:23           ` Yang Xi
@ 2008-12-26  7:24           ` Yang Xi
  2008-12-26  8:34             ` Ingo Molnar
  1 sibling, 1 reply; 10+ messages in thread
From: Yang Xi @ 2008-12-26  7:24 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, mingo, chyyuu

*Add max-hungry to capture the max value of the number of threads who
fight on a ticket lock
*For non-ticketlock implementation,  spin_nr_contended(lock) is
(spin_is_contended(lock) ? 1 : 0), which means "con-hungry" represent
how many times there are more than two threads waiting for the
spinlock to be free.

Because the lock-stat and lock-dep is so heavy, this statistic result
is not very accurate :(

Here is the patch.

Signed-off-by: Yangxi <hiyangxi@gmail.com>
---

 arch/x86/Kconfig                |    1 +
 arch/x86/include/asm/spinlock.h |    7 +++++++
 include/linux/lockdep.h         |    5 ++++-
 include/linux/spinlock.h        |    6 ++++++
 kernel/lockdep.c                |   24 +++++++++++++++++++++---
 kernel/lockdep_proc.c           |   10 +++++++---
 lib/Kconfig.debug               |    3 +++
 lib/spinlock_debug.c            |    1 +
 8 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab1..20fee72 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_TICKET_SPINLOCK

 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index d17c919..da9cffc 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,6 +172,13 @@ static inline int
__ticket_spin_is_contended(raw_spinlock_t *lock)
 	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }

+static inline int __ticket_spin_nr_contended(raw_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->slock);
+
+	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) + 1;
+}
+
 #ifdef CONFIG_PARAVIRT
 /*
  * Define virtualization-friendly old-style lock byte lock, for use in
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..c45efc2 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,8 @@ enum bounce_type {
 	bounce_acquired_read,
 	bounce_contended_write,
 	bounce_contended_read,
+	bounce_hungry,
+	bounce_max_hungry,
 	nr_bounce_types,

 	bounce_acquired = bounce_acquired_write,
@@ -164,7 +166,8 @@ struct lockdep_map {
 	struct lock_class		*class_cache;
 	const char			*name;
 #ifdef CONFIG_LOCK_STAT
-	int				cpu;
+	unsigned int			cpu:31;
+	unsigned int                    isticketspinlock:1;
 #endif
 };

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e0c0fcc..c6be4bc 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -127,6 +127,12 @@ do {								\
 #define spin_is_contended(lock)	__raw_spin_is_contended(&(lock)->raw_lock)
 #endif

+#ifdef CONFIG_HAVE_TICKET_SPINLOCK
+#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
+#else
+#define spin_nr_contended(lock) (spin_is_contended(lock) ? 1 : 0)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..8483e1e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -191,8 +191,14 @@ struct lock_class_stats lock_stats(struct
lock_class *class)
 		lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
 		lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);

-		for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
+		for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) {
+			if (i == bounce_max_hungry) {
+				if (stats.bounces[i] < pcs->bounces[i])
+					stats.bounces[i] = pcs->bounces[i];
+				continue;
+			}
 			stats.bounces[i] += pcs->bounces[i];
+		}
 	}

 	return stats;
@@ -2588,7 +2594,6 @@ static int __lock_acquire(struct lockdep_map
*lock, unsigned int subclass,

 	if (check == 2 && !mark_irqflags(curr, hlock))
 		return 0;
-
 	/* mark it as used: */
 	if (!mark_lock(curr, hlock, LOCK_USED))
 		return 0;
@@ -2623,7 +2628,6 @@ static int __lock_acquire(struct lockdep_map
*lock, unsigned int subclass,

 	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
 		return 0;
-
 	curr->curr_chain_key = chain_key;
 	curr->lockdep_depth++;
 	check_chain_key(curr);
@@ -3000,6 +3004,13 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
 	struct lock_class_stats *stats;
 	unsigned int depth;
 	int i, point;
+	spinlock_t *lock_ptr;
+	unsigned long hungry = 0;
+
+	if (lock->isticketspinlock) {
+		lock_ptr = container_of(lock, spinlock_t, dep_map);
+		hungry = spin_nr_contended(lock_ptr);
+	}

 	depth = curr->lockdep_depth;
 	if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3030,9 +3041,16 @@ found_it:
 		stats->contention_point[point]++;
 	if (lock->cpu != smp_processor_id())
 		stats->bounces[bounce_contended + !!hlock->read]++;
+	stats->bounces[bounce_hungry] += hungry;
+	if (lock->isticketspinlock) {
+		if (stats->bounces[bounce_max_hungry] < hungry)
+			stats->bounces[bounce_max_hungry] = hungry;
+	}
+
 	put_lock_stats(stats);
 }

+
 static void
 __lock_acquired(struct lockdep_map *lock)
 {
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..d5f94e5 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -535,6 +535,8 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
 			seq_printf(m, "%40s:", name);

 		seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+		seq_printf(m, "%14lu ", stats->bounces[bounce_hungry]);
+		seq_printf(m, "%14lu ", stats->bounces[bounce_max_hungry]);
 		seq_lock_time(m, &stats->write_waittime);
 		seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
 		seq_lock_time(m, &stats->write_holdtime);
@@ -583,11 +585,13 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
 static void seq_header(struct seq_file *m)
 {
 	seq_printf(m, "lock_stat version 0.2\n");
-	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+	seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
+	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
 			"%14s %14s\n",
 			"class name",
 			"con-bounces",
+			"con-hungry",
+			"max-hungry",
 			"contentions",
 			"waittime-min",
 			"waittime-max",
@@ -597,7 +601,7 @@ static void seq_header(struct seq_file *m)
 			"holdtime-min",
 			"holdtime-max",
 			"holdtime-total");
-	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+	seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
 	seq_printf(m, "\n");
 }

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e..b9eb62a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -318,6 +318,9 @@ config RT_MUTEX_TESTER
 	help
 	  This option enables a rt-mutex tester.

+config HAVE_TICKET_SPINLOCK
+       bool
+
 config DEBUG_SPINLOCK
 	bool "Spinlock and rw-lock debugging: basic checks"
 	depends on DEBUG_KERNEL
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..bba8d3e 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
+	lock->dep_map.isticketspinlock = 1;
 }

 EXPORT_SYMBOL(__spin_lock_init);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-12-26  7:24           ` Yang Xi
@ 2008-12-26  8:34             ` Ingo Molnar
  2008-12-26 13:13               ` Yang Xi
  0 siblings, 1 reply; 10+ messages in thread
From: Ingo Molnar @ 2008-12-26  8:34 UTC (permalink / raw)
  To: Yang Xi; +Cc: Peter Zijlstra, linux-kernel, chyyuu


* Yang Xi <yangxilkm@gmail.com> wrote:

> Because the lock-stat and lock-dep is so heavy, this statistic result is 
> not very accurate :(

hm, lockdep indeed has to do some non-trivial work - but is that really 
true of pure lockstat too? If you have a workload where you can see that 
it's heavy, you could do an NMI profile on x86 by running kerneltop on 
tip/master:

     http://redhat.com/~mingo/perfcounters/kerneltop.c

You should get a top-alike list of the highest-cost functions. If lockstat 
is heavy, its activities should show up there.

> @@ -34,6 +34,7 @@ config X86
>  	select HAVE_ARCH_TRACEHOOK
>  	select HAVE_GENERIC_DMA_COHERENT if X86_32
>  	select HAVE_EFFICIENT_UNALIGNED_ACCESS
> +	select HAVE_TICKET_SPINLOCK

no fundamental objections against your patch, but i think it needs a 
couple of cleanups first.

For example, this HAVE_TICKET_SPINLOCK distinction is unnecessarily 
exposed to the core kernel, why not just allow architectures to define 
spin_nr_contended():

> +#ifdef CONFIG_HAVE_TICKET_SPINLOCK
> +#define spin_nr_contended(lock) __ticket_spin_nr_contended(&(lock)->raw_lock)
> +#else
> +#define spin_nr_contended(lock) (spin_is_contended(lock) ? 1 : 0)
> +#endif

and do something like this in spinlock.h to give a default definition:

#ifndef spin_nr_contended
# define spin_nr_contended(lock) (spin_is_contended(lock) ? 1 : 0)
#endif

> @@ -2588,7 +2594,6 @@ static int __lock_acquire(struct lockdep_map
> *lock, unsigned int subclass,
> 
>  	if (check == 2 && !mark_irqflags(curr, hlock))
>  		return 0;
> -
>  	/* mark it as used: */
>  	if (!mark_lock(curr, hlock, LOCK_USED))
>  		return 0;
> @@ -2623,7 +2628,6 @@ static int __lock_acquire(struct lockdep_map
> *lock, unsigned int subclass,
> 
>  	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
>  		return 0;
> -
>  	curr->curr_chain_key = chain_key;
>  	curr->lockdep_depth++;
>  	check_chain_key(curr);
> @@ -3000,6 +3004,13 @@ __lock_contended(struct lockdep_map *lock,

those newlines you removed were there for a reason - they delimit blocks 
of code from each other and make return statements more visible.


> unsigned long ip)
>  	struct lock_class_stats *stats;
>  	unsigned int depth;
>  	int i, point;
> +	spinlock_t *lock_ptr;
> +	unsigned long hungry = 0;

please keep the local variable definitions in their original style, i.e. a 
reverse christmas tree:

>  	struct lock_class_stats *stats;
> +	unsigned long hungry = 0;
> +	spinlock_t *lock_ptr;
>  	unsigned int depth;
>  	int i, point;

> +
> +	if (lock->isticketspinlock) {
> +		lock_ptr = container_of(lock, spinlock_t, dep_map);
> +		hungry = spin_nr_contended(lock_ptr);
> +	}

do we need the ->isticketspinlock distinction? Cannot we call 
spin_nr_contended() for all spinlocks? (it's just that for ticket 
spinlocks we get a real value out of it - for normal spinlocks we only get 
0/1 out of it. But that is not a problem really.)

also, please rename 'hungry' to something more descriptive: for example 
'nr_contended' fits pretty well?

> @@ -3030,9 +3041,16 @@ found_it:
>  		stats->contention_point[point]++;
>  	if (lock->cpu != smp_processor_id())
>  		stats->bounces[bounce_contended + !!hlock->read]++;
> +	stats->bounces[bounce_hungry] += hungry;
> +	if (lock->isticketspinlock) {
> +		if (stats->bounces[bounce_max_hungry] < hungry)
> +			stats->bounces[bounce_max_hungry] = hungry;
> +	}
> +
>  	put_lock_stats(stats);
>  }
> 
> +
>  static void

spurious newline.

	Ingo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock
  2008-12-26  8:34             ` Ingo Molnar
@ 2008-12-26 13:13               ` Yang Xi
  0 siblings, 0 replies; 10+ messages in thread
From: Yang Xi @ 2008-12-26 13:13 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Peter Zijlstra, linux-kernel, chyyuu

> #ifndef spin_nr_contended
> # define spin_nr_contended(lock) (spin_is_contended(lock) ? 1 : 0)
> #endif
And I change the spin_nr_contended to spin_nr_contender
> do we need the ->isticketspinlock distinction? Cannot we call
> spin_nr_contended() for all spinlocks? (it's just that for ticket
> spinlocks we get a real value out of it - for normal spinlocks we only get
> 0/1 out of it. But that is not a problem really.)
Not only the spin lock invokes lock_contended(). So we have to know
the invoker is spin lock. I have changed the isticketspinlock to
isspinlock to avoid the confusion.
>
> also, please rename 'hungry' to something more descriptive: for example
> 'nr_contended' fits pretty well?
I changed hungry to contender. con-hungry to nr-contender, max hungry
to max-contender.

Here is the patch

Signed-off-by: Yangxi <hiyangxi@gmail.com>
---
arch/x86/include/asm/spinlock.h |    9 +++++++++
 include/linux/lockdep.h         |    5 ++++-
 include/linux/spinlock.h        |    4 ++++
 kernel/lockdep.c                |   21 ++++++++++++++++++++-
 kernel/lockdep_proc.c           |   10 +++++++---
 lib/spinlock_debug.c            |    1 +
 6 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index d17c919..5c52960 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -172,6 +172,15 @@ static inline int
__ticket_spin_is_contended(raw_spinlock_t *lock)
 	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
 }

+static inline int __ticket_spin_nr_contender(raw_spinlock_t *lock)
+{
+	int tmp = ACCESS_ONCE(lock->slock);
+
+	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) + 1;
+}
+
+#define spin_nr_contender(lock) __ticket_spin_nr_contender(&(lock)->raw_lock)
+
 #ifdef CONFIG_PARAVIRT
 /*
  * Define virtualization-friendly old-style lock byte lock, for use in
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1..d777102 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -136,6 +136,8 @@ enum bounce_type {
 	bounce_acquired_read,
 	bounce_contended_write,
 	bounce_contended_read,
+	nr_contender,
+	max_contender,
 	nr_bounce_types,

 	bounce_acquired = bounce_acquired_write,
@@ -164,7 +166,8 @@ struct lockdep_map {
 	struct lock_class		*class_cache;
 	const char			*name;
 #ifdef CONFIG_LOCK_STAT
-	int				cpu;
+	unsigned int			cpu:32;
+	unsigned int                    isspinlock:1;
 #endif
 };

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e0c0fcc..54b8a88 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -127,6 +127,10 @@ do {								\
 #define spin_is_contended(lock)	__raw_spin_is_contended(&(lock)->raw_lock)
 #endif

+#ifndef spin_nr_contender
+#define spin_nr_contender(lock) (spin_is_contended(lock) ? 1 : 0)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e1571..9a9e7d7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -191,8 +191,14 @@ struct lock_class_stats lock_stats(struct
lock_class *class)
 		lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
 		lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);

-		for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
+		for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) {
+			if (i == max_contender) {
+				if (stats.bounces[i] < pcs->bounces[i])
+					stats.bounces[i] = pcs->bounces[i];
+				continue;
+			}
 			stats.bounces[i] += pcs->bounces[i];
+		}
 	}

 	return stats;
@@ -2998,9 +3004,16 @@ __lock_contended(struct lockdep_map *lock,
unsigned long ip)
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
 	struct lock_class_stats *stats;
+	unsigned long contender;
+	spinlock_t *lock_ptr;
 	unsigned int depth;
 	int i, point;

+	if (lock->isspinlock) {
+		lock_ptr = container_of(lock, spinlock_t, dep_map);
+		contender = spin_nr_contender(lock_ptr);
+	}
+
 	depth = curr->lockdep_depth;
 	if (DEBUG_LOCKS_WARN_ON(!depth))
 		return;
@@ -3030,6 +3043,12 @@ found_it:
 		stats->contention_point[point]++;
 	if (lock->cpu != smp_processor_id())
 		stats->bounces[bounce_contended + !!hlock->read]++;
+	stats->bounces[nr_contender] += contender;
+	if (lock->isspinlock) {
+		if (stats->bounces[max_contender] < contender)
+			stats->bounces[max_contender] = contender;
+	}
+
 	put_lock_stats(stats);
 }

diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf..c87b42c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -535,6 +535,8 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
 			seq_printf(m, "%40s:", name);

 		seq_printf(m, "%14lu ", stats->bounces[bounce_contended_write]);
+		seq_printf(m, "%14lu ", stats->bounces[nr_contender]);
+		seq_printf(m, "%14lu ", stats->bounces[max_contender]);
 		seq_lock_time(m, &stats->write_waittime);
 		seq_printf(m, " %14lu ", stats->bounces[bounce_acquired_write]);
 		seq_lock_time(m, &stats->write_holdtime);
@@ -583,11 +585,13 @@ static void seq_stats(struct seq_file *m, struct
lock_stat_data *data)
 static void seq_header(struct seq_file *m)
 {
 	seq_printf(m, "lock_stat version 0.2\n");
-	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
-	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+	seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
+	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s "
 			"%14s %14s\n",
 			"class name",
 			"con-bounces",
+			"nr-contender",
+			"max-contender",
 			"contentions",
 			"waittime-min",
 			"waittime-max",
@@ -597,7 +601,7 @@ static void seq_header(struct seq_file *m)
 			"holdtime-min",
 			"holdtime-max",
 			"holdtime-total");
-	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+	seq_line(m, '-', 0, 40 + 1 + 12 * (14 + 1));
 	seq_printf(m, "\n");
 }

diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b025..5c6e06f 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -27,6 +27,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
+	lock->dep_map.isspinlock = 1;
 }

 EXPORT_SYMBOL(__spin_lock_init);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2008-12-26 13:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-11-18 11:00 [PATCH 2.6.28-rc4]lock_stat: Add "con-hungry" to show that how many person-time fight for the ticket spinlock Yang Xi
2008-11-18 16:20 ` Peter Zijlstra
2008-11-19  5:18   ` Yang Xi
2008-11-19 16:39     ` Peter Zijlstra
2008-11-20  8:09       ` Yang Xi
2008-11-23  1:40         ` Peter Zijlstra
2008-11-23  8:23           ` Yang Xi
2008-12-26  7:24           ` Yang Xi
2008-12-26  8:34             ` Ingo Molnar
2008-12-26 13:13               ` Yang Xi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).