Linux spin lock enhancement on xen

* Linux spin lock enhancement on xen
@ 2010-08-17  1:33 Mukesh Rathor
  2010-08-17  7:33 ` Keir Fraser
                   ` (3 more replies)
  0 siblings, 4 replies; 23+ messages in thread
From: Mukesh Rathor @ 2010-08-17  1:33 UTC (permalink / raw)
  To: Xen-devel

[-- Attachment #1: Type: text/plain, Size: 859 bytes --]

Hi guys,

Check out the attached patches. I changed the spin lock semantics so the
lock contains the vcpu id of the vcpu holding it. This then tells xen
to make that vcpu runnable if not already running:

Linux:
   spin_lock()
       if (try_lock() == failed)
           loop X times
           if (try_lock() == failed)
               sched_op_yield_to(vcpu_num of holder)
               start again;
           endif
       endif

Xen:
     sched_op_yield_to:
          if (vcpu_running(vcpu_num arg))
              do nothing
          else
              vcpu_kick(vcpu_num arg)
              do_yield()
          endif


In my worst case test scenario, I get about 20-36% improvement when the
system is two to three times over provisioned. 

Please provide any feedback. I would like to submit official patch for
SCHEDOP_yield_to in xen.

thanks,
Mukesh


[-- Attachment #2: spin-lin.diff --]
[-- Type: text/x-patch, Size: 5303 bytes --]

diff --git a/Makefile b/Makefile
index 8b0b42f..d8d1dbb 100644
--- a/Makefile
+++ b/Makefile
@@ -303,7 +303,8 @@ LINUXINCLUDE    := -Iinclude \
                    $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \
 		   -include include/linux/autoconf.h
 
-CPPFLAGS        := -D__KERNEL__ $(LINUXINCLUDE)
+# CPPFLAGS        := -D__KERNEL__ $(LINUXINCLUDE) 
+CPPFLAGS        := -D__KERNEL__ $(LINUXINCLUDE) -D _XEN_SPIN_LOCK
 
 CFLAGS          := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
                    -fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -fno-delete-null-pointer-checks
diff --git a/include/asm-i386/mach-xen/asm/hypervisor.h b/include/asm-i386/mach-xen/asm/hypervisor.h
index 89cde62..a3806f1 100644
--- a/include/asm-i386/mach-xen/asm/hypervisor.h
+++ b/include/asm-i386/mach-xen/asm/hypervisor.h
@@ -143,6 +143,15 @@ HYPERVISOR_yield(
 }
 
 static inline int
+HYPERVISOR_yield_to(uint vcpu)
+{
+        struct sched_yield_to yield_to = { .version = 1, .vcpu_id = vcpu };
+	int rc = HYPERVISOR_sched_op(SCHEDOP_yield_to, &yield_to);
+        /* TBD: compat */
+	return rc;
+}
+
+static inline int
 HYPERVISOR_block(
 	void)
 {
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index a8e3d89..c76e20f 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -16,6 +16,81 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
+#ifdef _XEN_SPIN_LOCK
+#include <asm/hypervisor.h>
+
+#define __raw_spin_is_locked(x) \
+		(*(volatile signed int *)(&(x)->slock) >= 0)
+
+static inline int _attempt_raw_spin_lock(raw_spinlock_t *lock)
+{
+    const int COUNTMAX = 10000, myid=read_pda(cpunumber);
+    int oldval;
+
+    asm volatile 
+        ("1:  movsxl %1, %%rax              \n"
+         "    cmpq $0, %%rax             \n"
+         "    jge 4f                      \n"
+         "2:                              \n"
+         LOCK_PREFIX " cmpxchgl %k2, %1   \n"
+         "    jnz 4f                      \n"
+         "3:  /* exit */                  \n" 
+         LOCK_SECTION_START("")
+         "4:  xor %%rdx, %%rdx            \n"
+         "6:  inc %%rdx                   \n"
+         "    cmpl %k3,   %%edx           \n"
+         "    jge 3b                      \n"
+         "    pause                       \n"
+         "    movsxl %1, %%rax              \n"
+         "    cmpq $0, %%rax             \n"
+         "    jge 6b                      \n"
+         "    jmp 2b                       \n"
+        LOCK_SECTION_END
+
+         : "=&a" (oldval)
+         : "m" (lock->slock), "c" (myid), "g" (COUNTMAX)
+         : "rdx", "memory", "cc"
+        );
+        return oldval;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+    int rc, old_lock_holder;
+
+    do {
+        old_lock_holder = _attempt_raw_spin_lock(lock);
+
+        if (old_lock_holder >= 0)
+            if ((rc=HYPERVISOR_yield_to(old_lock_holder)) != 0)
+                printk("XEN: Yield failed. rc:%d\n", rc);
+    } while (old_lock_holder != -1);
+}
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+        int oldval, myid = read_pda(cpunumber);
+
+        __asm__ __volatile__ (
+                "movl $-1, %%eax                \n"
+                LOCK_PREFIX " cmpxchgl %k2, %1  \n"
+                : "=&a" (oldval) 
+                : "m" (lock->slock), "c" (myid)
+                : "memory", "cc"
+        );
+
+	return (oldval == -1);
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	__asm__ __volatile__ ("movl $-1, %0" : "=m"(lock->slock) : : "memory");
+}
+
+#else
+
 #define __raw_spin_is_locked(x) \
 		(*(volatile signed int *)(&(x)->slock) <= 0)
 
@@ -64,6 +139,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 	);
 }
 
+#endif
+
 #define __raw_spin_unlock_wait(lock) \
 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -124,4 +201,5 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 				: "=m" (rw->lock) : : "memory");
 }
 
+
 #endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86_64/spinlock_types.h b/include/asm-x86_64/spinlock_types.h
index 59efe84..6fb8da0 100644
--- a/include/asm-x86_64/spinlock_types.h
+++ b/include/asm-x86_64/spinlock_types.h
@@ -9,7 +9,11 @@ typedef struct {
 	volatile unsigned int slock;
 } raw_spinlock_t;
 
+#ifdef _XEN_SPIN_LOCK
+#define __RAW_SPIN_LOCK_UNLOCKED	{ -1 }
+#else
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
+#endif
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index abf11cc..dc60001 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -90,6 +90,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
 #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
 #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
 
+
+/*
+ * Voluntarily yield the CPU to another given vcpu
+ * @arg == vcpu info.
+ */
+#define SCHEDOP_yield_to      5
+struct sched_yield_to {
+    unsigned int version;
+    unsigned int vcpu_id;
+};
+
 #endif /* __XEN_PUBLIC_SCHED_H__ */
 
 /*

[-- Attachment #3: spin-xen.diff --]
[-- Type: text/x-patch, Size: 1540 bytes --]

diff -r c840095b9359 xen/common/schedule.c
--- a/xen/common/schedule.c	Mon Jul 26 03:55:45 2010 -0700
+++ b/xen/common/schedule.c	Mon Aug 16 18:33:07 2010 -0700
@@ -627,6 +627,30 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN
         break;
     }
 
+    case SCHEDOP_yield_to:
+    {
+        struct sched_yield_to yld_s;
+        struct vcpu *vp;
+        struct domain *dp = current->domain;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&yld_s, arg, 1) )
+            break;
+
+        ret = -EINVAL;
+        if (is_idle_vcpu(current) || yld_s.vcpu_id > dp->max_vcpus)
+            break;
+
+        vp = dp->vcpu[yld_s.vcpu_id];
+        if (!vp->is_running) {
+            vcpu_kick(dp->vcpu[yld_s.vcpu_id]);
+            ret = do_yield();
+        } else
+            ret = 0;
+
+        break;
+    }
+
     case SCHEDOP_block:
     {
         ret = do_block();
diff -r c840095b9359 xen/include/public/sched.h
--- a/xen/include/public/sched.h	Mon Jul 26 03:55:45 2010 -0700
+++ b/xen/include/public/sched.h	Mon Aug 16 18:33:07 2010 -0700
@@ -108,6 +108,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
 #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
 #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
 
+
+/*
+ *  * Voluntarily yield the CPU to another given vcpu
+ *   * @arg == vcpu info.
+ *    */
+#define SCHEDOP_yield_to      5
+struct sched_yield_to {
+    unsigned int version;
+    unsigned int vcpu_id;
+};
+
 #endif /* __XEN_PUBLIC_SCHED_H__ */
 
 /*

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread