From mboxrd@z Thu Jan  1 00:00:00 1970
From: David Vrabel <david.vrabel@citrix.com>
Subject: [PATCHv15 1/2] gnttab: use per-VCPU maptrack free lists
Date: Thu, 18 Jun 2015 18:18:32 +0100
Message-ID: <1434647913-16028-2-git-send-email-david.vrabel@citrix.com>
References: <1434647913-16028-1-git-send-email-david.vrabel@citrix.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
Received: from mail6.bemta5.messagelabs.com ([195.245.231.135])
	by lists.xen.org with esmtp (Exim 4.72)
	(envelope-from <prvs=6045ec3c1=david.vrabel@citrix.com>)
	id 1Z5dwq-0008OW-Qq
	for xen-devel@lists.xenproject.org; Thu, 18 Jun 2015 17:49:57 +0000
In-Reply-To: <1434647913-16028-1-git-send-email-david.vrabel@citrix.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: xen-devel@lists.xenproject.org
Cc: Keir Fraser <keir@xen.org>, Ian Campbell <ian.campbell@citrix.com>, Tim Deegan <tim@xen.org>, David Vrabel <david.vrabel@citrix.com>, Jan Beulich <jbeulich@suse.com>, Malcolm Crossley <malcolm.crossley@citrix.com>
List-Id: xen-devel@lists.xenproject.org

From: Malcolm Crossley <malcolm.crossley@citrix.com>

Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.

Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle.  Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).

Increase the default maximum number of maptrack frames by 4 times
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
b) a guest may not evenly distribute all the grant map operations
across the VCPUs (meaning some VCPUs need more maptrack entries than
others).

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v15:
- Alloc maptrack array in grant_table_create().
- Init per-VCPU state when VCPUs are allocated.

v14:
- When adding a new frame, use first entry and don't add it to free
  list.
- v -> curr and d -> currd (where appropriate).

v13:
- When adding new frames, update v->maptrack_head last.
- Each VCPU no longer gets a reserved number of frames -- they're
  allocated first-come, first-served.
- Use original gt->maptrack_limit (instead of gt->maptrack_pages).

v11:
- Allocate maptrack array with vzalloc() since it may be >1 page.

v10:
- Divide max_maptrack_frames evenly amongst the VCPUs.
- Increase default max_maptrack_frames to compensate.
---
 xen/common/domain.c           |    2 +
 xen/common/grant_table.c      |  126 +++++++++++++++++++++++++++--------------
 xen/include/xen/grant_table.h |    4 +-
 xen/include/xen/sched.h       |    4 ++
 4 files changed, 92 insertions(+), 44 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index b0e83f5..3bc52e6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -127,6 +127,8 @@ struct vcpu *alloc_vcpu(
 
     tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
 
+    grant_table_init_vcpu(v);
+
     if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
          !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
          !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index a011276..a371822 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -37,6 +37,7 @@
 #include <xen/iommu.h>
 #include <xen/paging.h>
 #include <xen/keyhandler.h>
+#include <xen/vmap.h>
 #include <xsm/xsm.h>
 #include <asm/flushtlb.h>
 
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_grant_frames);
  * New options allow to set max_maptrack_frames and
  * map_grant_table_frames independently.
  */
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
 
 static unsigned int __read_mostly max_maptrack_frames;
 integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
@@ -121,6 +122,8 @@ struct grant_mapping {
     u32      ref;           /* grant ref */
     u16      flags;         /* 0-4: GNTMAP_* ; 5-15: unused */
     domid_t  domid;         /* granting domain */
+    u32      vcpu;          /* vcpu which created the grant mapping */
+    u32      pad;           /* round size to a power of 2 */
 };
 
 #define MAPTRACK_PER_PAGE (PAGE_SIZE / sizeof(struct grant_mapping))
@@ -289,62 +292,103 @@ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
 
 static inline int
 __get_maptrack_handle(
-    struct grant_table *t)
+    struct grant_table *t,
+    struct vcpu *v)
 {
-    unsigned int h;
-    if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+    unsigned int head, next;
+
+    /* No maptrack pages allocated for this VCPU yet? */
+    head = v->maptrack_head;
+    if ( unlikely(head == MAPTRACK_TAIL) )
+        return -1;
+
+    /*
+     * Always keep one entry in the free list to make it easier to add
+     * free entries to the tail.
+     */
+    next = read_atomic(&maptrack_entry(t, head).ref);
+    if ( unlikely(next == MAPTRACK_TAIL) )
         return -1;
-    t->maptrack_head = maptrack_entry(t, h).ref;
-    return h;
+
+    v->maptrack_head = next;
+
+    return head;
 }
 
 static inline void
 put_maptrack_handle(
     struct grant_table *t, int handle)
 {
-    spin_lock(&t->maptrack_lock);
-    maptrack_entry(t, handle).ref = t->maptrack_head;
-    t->maptrack_head = handle;
-    spin_unlock(&t->maptrack_lock);
+    struct domain *currd = current->domain;
+    struct vcpu *v;
+    unsigned int prev_tail, cur_tail;
+
+    /* 1. Set entry to be a tail. */
+    maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+    /* 2. Add entry to the tail of the list on the original VCPU. */
+    v = currd->vcpu[maptrack_entry(t,handle).vcpu];
+
+    cur_tail = read_atomic(&v->maptrack_tail);
+    do {
+        prev_tail = cur_tail;
+        cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+    } while ( cur_tail != prev_tail );
+
+    /* 3. Update the old tail entry to point to the new entry. */
+    write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
 }
 
 static inline int
 get_maptrack_handle(
     struct grant_table *lgt)
 {
+    struct vcpu          *curr = current;
     int                   i;
     grant_handle_t        handle;
     struct grant_mapping *new_mt;
-    unsigned int          new_mt_limit, nr_frames;
+
+    handle = __get_maptrack_handle(lgt, curr);
+    if ( likely(handle != -1) )
+        return handle;
 
     spin_lock(&lgt->maptrack_lock);
 
-    while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+    if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
     {
-        nr_frames = nr_maptrack_frames(lgt);
-        if ( nr_frames >= max_maptrack_frames )
-            break;
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
 
-        new_mt = alloc_xenheap_page();
-        if ( !new_mt )
-            break;
+    new_mt = alloc_xenheap_page();
+    if ( !new_mt )
+    {
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
+    clear_page(new_mt);
 
-        clear_page(new_mt);
+    /*
+     * Use the first new entry and add the remaining entries to the
+     * head of the free list.
+     */
+    handle = lgt->maptrack_limit;
 
-        new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+    for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
+    {
+        new_mt[i].ref = handle + i + 1;
+        new_mt[i].vcpu = curr->vcpu_id;
+    }
+    new_mt[i - 1].ref = curr->maptrack_head;
 
-        for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-            new_mt[i - 1].ref = lgt->maptrack_limit + i;
-        new_mt[i - 1].ref = lgt->maptrack_head;
-        lgt->maptrack_head = lgt->maptrack_limit;
+    /* Set tail directly if this is the first page for this VCPU. */
+    if ( curr->maptrack_tail == MAPTRACK_TAIL )
+        curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
 
-        lgt->maptrack[nr_frames] = new_mt;
-        smp_wmb();
-        lgt->maptrack_limit      = new_mt_limit;
+    curr->maptrack_head = handle + 1;
 
-        gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
-                 nr_frames + 1);
-    }
+    lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+    lgt->maptrack_limit += MAPTRACK_PER_PAGE;
 
     spin_unlock(&lgt->maptrack_lock);
 
@@ -3048,16 +3092,9 @@ grant_table_create(
     }
 
     /* Tracking of mapped foreign frames table */
-    if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
-                                      max_maptrack_frames)) == NULL )
+    t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
+    if ( t->maptrack == NULL )
         goto no_mem_2;
-    if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
-        goto no_mem_3;
-    clear_page(t->maptrack[0]);
-    t->maptrack_limit = MAPTRACK_PER_PAGE;
-    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-        t->maptrack[0][i - 1].ref = i;
-    t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
 
     /* Shared grant table. */
     if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
@@ -3089,8 +3126,7 @@ grant_table_create(
         free_xenheap_page(t->shared_raw[i]);
     xfree(t->shared_raw);
  no_mem_3:
-    free_xenheap_page(t->maptrack[0]);
-    xfree(t->maptrack);
+    vfree(t->maptrack);
  no_mem_2:
     for ( i = 0;
           i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
@@ -3225,7 +3261,7 @@ grant_table_destroy(
 
     for ( i = 0; i < nr_maptrack_frames(t); i++ )
         free_xenheap_page(t->maptrack[i]);
-    xfree(t->maptrack);
+    vfree(t->maptrack);
 
     for ( i = 0; i < nr_active_grant_frames(t); i++ )
         free_xenheap_page(t->active[i]);
@@ -3239,6 +3275,12 @@ grant_table_destroy(
     d->grant_table = NULL;
 }
 
+void grant_table_init_vcpu(struct vcpu *v)
+{
+    v->maptrack_head = MAPTRACK_TAIL;
+    v->maptrack_tail = MAPTRACK_TAIL;
+}
+
 static void gnttab_usage_print(struct domain *rd)
 {
     int first = 1;
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 9536331..9c7b5a3 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -73,9 +73,8 @@ struct grant_table {
     grant_status_t       **status;
     /* Active grant table. */
     struct active_grant_entry **active;
-    /* Mapping tracking table. */
+    /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
-    unsigned int          maptrack_head;
     unsigned int          maptrack_limit;
     /* Lock protecting the maptrack page list, head, and limit */
     spinlock_t            maptrack_lock;
@@ -89,6 +88,7 @@ int grant_table_create(
     struct domain *d);
 void grant_table_destroy(
     struct domain *d);
+void grant_table_init_vcpu(struct vcpu *v);
 
 /* Domain death release of granted mappings of other domains' memory. */
 void
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 604d047..d810e1c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,10 @@ struct vcpu
     /* VCPU paused by system controller. */
     int              controller_pause_count;
 
+    /* Maptrack */
+    unsigned int     maptrack_head;
+    unsigned int     maptrack_tail;
+
     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
     spinlock_t       virq_lock;
-- 
1.7.10.4