All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <waiman.long@hp.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org,
	Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>,
	Oleg Nesterov <oleg@redhat.com>,
	kvm@vger.kernel.org, Scott J Norton <scott.norton@hp.com>,
	x86@kernel.org, Paolo Bonzini <paolo.bonzini@gmail.com>,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	Ingo Molnar <mingo@redhat.com>,
	David Vrabel <david.vrabel@citrix.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	xen-devel@lists.xenproject.org,
	Thomas Gleixner <tglx@linutronix.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Douglas Hatch <doug.hatch@hp.com>
Subject: Re: [PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support
Date: Mon, 27 Oct 2014 13:15:53 -0400	[thread overview]
Message-ID: <544E7DC9.5020903__5012.37018402225$1414430281$gmane$org@hp.com> (raw)
In-Reply-To: <20141024220423.GB10501@worktop.programming.kicks-ass.net>

On 10/24/2014 06:04 PM, Peter Zijlstra wrote:
> On Fri, Oct 24, 2014 at 04:53:27PM -0400, Waiman Long wrote:
>> The additional register pressure may just cause a few more register moves
>> which should be negligible in the overall performance . The additional
>> icache pressure, however, may have some impact on performance. I was trying
>> to balance the performance of the pv and non-pv versions so that we won't
>> penalize the pv code too much for a bit more performance in the non-pv code.
>> Doing it your way will add a lot of function call and register
>> saving/restoring to the pv code.
> If people care about performance they should not be using virt crap :-)
>
> I only really care about bare metal.

Yes, I am aware of that. However, the whole point of doing PV spinlock 
is to improve performance in a virtual guest.

Anyway, I had done some measurements. In my test system, the 
queue_spin_lock_slowpath() function has a text size of about 400 bytes 
without PV, but 1120 bytes with PV. I made some changes to create 
separate versions of PV and non-PV slowpath functions as shown by the 
diff below. The text size is now about 430 bytes for the non-PV version 
and 925 bytes for the PV version. The overall object size increases by a 
bit more than 200 bytes, but the icache footprint should be reduced no 
matter which version is used.

-Longman

----------------------------------------

diff --git a/arch/x86/include/asm/pvqspinlock.h 
b/arch/x86/include/asm/pvqspinlo
index d424252..241bf30 100644
--- a/arch/x86/include/asm/pvqspinlock.h
+++ b/arch/x86/include/asm/pvqspinlock.h
@@ -79,9 +79,6 @@ static inline void pv_init_node(struct mcs_spinlock *node)

         BUILD_BUG_ON(sizeof(struct pv_qnode) > 5*sizeof(struct 
mcs_spinlock));

-       if (!pv_enabled())
-               return;
-
         pn->cpustate = PV_CPU_ACTIVE;
         pn->mayhalt  = false;
         pn->mycpu    = smp_processor_id();
@@ -132,9 +129,6 @@ static inline bool pv_link_and_wait_node(u32 old, 
struct mcs
         struct pv_qnode *ppn, *pn = (struct pv_qnode *)node;
         unsigned int count;

-       if (!pv_enabled())
-               return false;
-
         if (!(old & _Q_TAIL_MASK)) {
                 node->locked = true;    /* At queue head now */
                 goto ret;
@@ -236,9 +230,6 @@ pv_wait_head(struct qspinlock *lock, struct 
mcs_spinlock *no
  {
         struct pv_qnode *pn = (struct pv_qnode *)node;

-       if (!pv_enabled())
-               return smp_load_acquire(&lock->val.counter);
-
         for (;;) {
                 unsigned int count;
                 s8 oldstate;
@@ -328,8 +319,6 @@ static inline void pv_wait_check(struct qspinlock *lock,
         struct pv_qnode *pnxt = (struct pv_qnode *)next;
         struct pv_qnode *pcur = (struct pv_qnode *)node;

-       if (!pv_enabled())
-               return;
         /*
          * Clear the locked and head values of lock holder
          */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 1662dbd..05aea57 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -16,6 +16,7 @@
   * Authors: Waiman Long <waiman.long@hp.com>
   *          Peter Zijlstra <pzijlstr@redhat.com>
   */
+#ifndef _GEN_PV_LOCK_SLOWPATH
  #include <linux/smp.h>
  #include <linux/bug.h>
  #include <linux/cpumask.h>
@@ -271,19 +272,37 @@ void queue_spin_unlock_slowpath(struct qspinlock 
*lock)
  }
  EXPORT_SYMBOL(queue_spin_unlock_slowpath);

-#else
+static void pv_queue_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+
+#else  /* CONFIG_PARAVIRT_SPINLOCKS */
+
+static inline void pv_queue_spin_lock_slowpath(struct qspinlock *lock, 
u32 val)
+       { }

-static inline void pv_init_node(struct mcs_spinlock *node)     { }
-static inline void pv_wait_check(struct qspinlock *lock,
-                                struct mcs_spinlock *node,
-                                struct mcs_spinlock *next)     { }
-static inline bool pv_link_and_wait_node(u32 old, struct mcs_spinlock 
*node)
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+/*
+ * Dummy PV functions for bare-metal slowpath code
+ */
+static inline void nopv_init_node(struct mcs_spinlock *node)   { }
+static inline void nopv_wait_check(struct qspinlock *lock,
+                                  struct mcs_spinlock *node,
+                                  struct mcs_spinlock *next)   { }
+static inline bool nopv_link_and_wait_node(u32 old, struct mcs_spinlock 
*node)
                    { return false; }
-static inline int  pv_wait_head(struct qspinlock *lock,
+static inline int  nopv_wait_head(struct qspinlock *lock,
                                 struct mcs_spinlock *node)
                    { return smp_load_acquire(&lock->val.counter); }
+static inline bool return_true(void)   { return true;  }
+static inline bool return_false(void)  { return false; }

-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+#define pv_init_node           nopv_init_node
+#define pv_wait_check          nopv_wait_check
+#define pv_link_and_wait_node  nopv_link_and_wait_node
+#define pv_wait_head           nopv_wait_head
+#define in_pv_code             return_false
+
+#endif /* _GEN_PV_LOCK_SLOWPATH */

  /**
   * queue_spin_lock_slowpath - acquire the queue spinlock
@@ -306,7 +325,11 @@ static inline int  pv_wait_head(struct qspinlock *lock,
   * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
   *   queue               :         ^--'                             :
   */
+#ifdef _GEN_PV_LOCK_SLOWPATH
+static void pv_queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+#else
  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+#endif
  {
         struct mcs_spinlock *prev, *next, *node;
         u32 new, old, tail;
@@ -314,7 +337,12 @@ void queue_spin_lock_slowpath(struct qspinlock 
*lock, u32 v

         BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));

-       if (pv_enabled())
+       if (pv_enabled()) {
+               pv_queue_spin_lock_slowpath(lock, val);
+               return;
+       }
+
+       if (in_pv_code())
                 goto queue;

         if (virt_queue_spin_lock(lock))
@@ -474,3 +502,23 @@ release:
         this_cpu_dec(mcs_nodes[0].count);
  }
  EXPORT_SYMBOL(queue_spin_lock_slowpath);
+
+#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+/*
+ * Generate the PV version of the queue_spin_lock_slowpath function
+ */
+#undef pv_init_node
+#undef pv_wait_check
+#undef pv_link_and_wait_node
+#undef pv_wait_head
+#undef EXPORT_SYMBOL
+#undef in_pv_code
+
+#define _GEN_PV_LOCK_SLOWPATH
+#define EXPORT_SYMBOL(x)
+#define in_pv_code     return_true
+#define pv_enabled     return_false
+
+#include "qspinlock.c"
+
+#endif

  parent reply	other threads:[~2014-10-27 17:16 UTC|newest]

Thread overview: 108+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-16 18:10 [PATCH v12 00/11] qspinlock: a 4-byte queue spinlock with PV support Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 01/11] qspinlock: A simple generic 4-byte queue spinlock Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 02/11] qspinlock, x86: Enable x86-64 to use " Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 03/11] qspinlock: Add pending bit Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 04/11] qspinlock: Extract out code snippets for the next patch Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 05/11] qspinlock: Optimize for smaller NR_CPUS Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 06/11] qspinlock: Use a simple write to grab the lock Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 07/11] qspinlock: Revert to test-and-set on hypervisors Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 08/11] qspinlock, x86: Rename paravirt_ticketlocks_enabled Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10   ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-24  8:47   ` Peter Zijlstra
2014-10-24  8:47   ` Peter Zijlstra
2014-10-24  8:47   ` Peter Zijlstra
2014-10-24 20:53     ` Waiman Long
2014-10-24 20:53       ` Waiman Long
2014-10-24 22:04       ` Peter Zijlstra
2014-10-24 22:04         ` Peter Zijlstra
2014-10-25  4:30         ` Mike Galbraith
2014-10-25  4:30         ` Mike Galbraith
2014-10-25  4:30         ` Mike Galbraith
2014-10-27 17:15         ` Waiman Long [this message]
2014-10-27 17:15         ` Waiman Long
2014-10-27 17:15           ` Waiman Long
2014-10-27 17:27           ` Peter Zijlstra
2014-10-27 20:50             ` Waiman Long
2014-10-27 20:50             ` Waiman Long
2014-10-27 20:50               ` Waiman Long
2014-10-27 17:27           ` Peter Zijlstra
2014-10-27 17:27           ` Peter Zijlstra
2014-10-24 22:04       ` Peter Zijlstra
2014-10-24 20:53     ` Waiman Long
2014-10-24  8:54   ` Peter Zijlstra
2014-10-24  8:54     ` Peter Zijlstra
2014-10-27 17:38     ` Waiman Long
2014-10-27 17:38       ` Waiman Long
2014-10-27 18:02       ` Konrad Rzeszutek Wilk
2014-10-27 18:02         ` Konrad Rzeszutek Wilk
2014-10-27 20:55         ` Waiman Long
2014-10-27 20:55         ` Waiman Long
2014-10-27 20:55         ` Waiman Long
2014-11-26  0:33         ` Waiman Long
2014-11-26  0:33         ` Waiman Long
2014-11-26  0:33         ` Waiman Long
2014-12-01 16:51           ` Konrad Rzeszutek Wilk
2014-12-01 16:51           ` Konrad Rzeszutek Wilk
2014-12-01 16:51             ` Konrad Rzeszutek Wilk
2014-10-27 18:02       ` Konrad Rzeszutek Wilk
2014-10-27 18:04       ` Peter Zijlstra
2014-10-27 18:04       ` Peter Zijlstra
2014-10-27 18:04       ` Peter Zijlstra
2014-10-27 21:22         ` Waiman Long
2014-10-27 21:22         ` Waiman Long
2014-10-27 21:22         ` Waiman Long
2014-10-29 19:05           ` Waiman Long
2014-10-29 19:05           ` Waiman Long
2014-10-29 19:05             ` Waiman Long
2014-10-29 20:25             ` Waiman Long
2014-10-29 20:25             ` Waiman Long
2014-10-29 20:25             ` Waiman Long
2014-10-27 17:38     ` Waiman Long
2014-10-24  8:54   ` Peter Zijlstra
2014-10-16 18:10 ` [PATCH v12 10/11] pvqspinlock, x86: Enable PV qspinlock for KVM Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` [PATCH v12 11/11] pvqspinlock, x86: Enable PV qspinlock for XEN Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-16 18:10 ` Waiman Long
2014-10-24  8:57 ` [PATCH v12 00/11] qspinlock: a 4-byte queue spinlock with PV support Peter Zijlstra
2014-10-24  8:57 ` Peter Zijlstra
2014-10-24  8:57   ` Peter Zijlstra
2014-10-27 18:00   ` Waiman Long
2014-10-27 18:00   ` Waiman Long
2014-10-27 18:00     ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='544E7DC9.5020903__5012.37018402225$1414430281$gmane$org@hp.com' \
    --to=waiman.long@hp.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=doug.hatch@hp.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=paolo.bonzini@gmail.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@linux.vnet.ibm.com \
    --cc=scott.norton@hp.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.