All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Vrabel <david.vrabel@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Keir Fraser <keir@xen.org>,
	Ian Campbell <ian.campbell@citrix.com>, Tim Deegan <tim@xen.org>,
	David Vrabel <david.vrabel@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Malcolm Crossley <malcolm.crossley@citrix.com>
Subject: [PATCHv15 1/2] gnttab: use per-VCPU maptrack free lists
Date: Thu, 18 Jun 2015 18:18:32 +0100	[thread overview]
Message-ID: <1434647913-16028-2-git-send-email-david.vrabel@citrix.com> (raw)
In-Reply-To: <1434647913-16028-1-git-send-email-david.vrabel@citrix.com>

From: Malcolm Crossley <malcolm.crossley@citrix.com>

Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.

Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle.  Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).

Increase the default maximum number of maptrack frames by 4 times
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
b) a guest may not evenly distribute all the grant map operations
across the VCPUs (meaning some VCPUs need more maptrack entries than
others).

Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v15:
- Alloc maptrack array in grant_table_create().
- Init per-VCPU state when VCPUs are allocated.

v14:
- When adding a new frame, use first entry and don't add it to free
  list.
- v -> curr and d -> currd (where appropriate).

v13:
- When adding new frames, update v->maptrack_head last.
- Each VCPU no longer gets a reserved number of frames -- they're
  allocated first-come, first-served.
- Use original gt->maptrack_limit (instead of gt->maptrack_pages).

v11:
- Allocate maptrack array with vzalloc() since it may be >1 page.

v10:
- Divide max_maptrack_frames evenly amongst the VCPUs.
- Increase default max_maptrack_frames to compensate.
---
 xen/common/domain.c           |    2 +
 xen/common/grant_table.c      |  126 +++++++++++++++++++++++++++--------------
 xen/include/xen/grant_table.h |    4 +-
 xen/include/xen/sched.h       |    4 ++
 4 files changed, 92 insertions(+), 44 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index b0e83f5..3bc52e6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -127,6 +127,8 @@ struct vcpu *alloc_vcpu(
 
     tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
 
+    grant_table_init_vcpu(v);
+
     if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
          !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
          !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index a011276..a371822 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -37,6 +37,7 @@
 #include <xen/iommu.h>
 #include <xen/paging.h>
 #include <xen/keyhandler.h>
+#include <xen/vmap.h>
 #include <xsm/xsm.h>
 #include <asm/flushtlb.h>
 
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_grant_frames);
  * New options allow to set max_maptrack_frames and
  * map_grant_table_frames independently.
  */
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
 
 static unsigned int __read_mostly max_maptrack_frames;
 integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
@@ -121,6 +122,8 @@ struct grant_mapping {
     u32      ref;           /* grant ref */
     u16      flags;         /* 0-4: GNTMAP_* ; 5-15: unused */
     domid_t  domid;         /* granting domain */
+    u32      vcpu;          /* vcpu which created the grant mapping */
+    u32      pad;           /* round size to a power of 2 */
 };
 
 #define MAPTRACK_PER_PAGE (PAGE_SIZE / sizeof(struct grant_mapping))
@@ -289,62 +292,103 @@ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
 
 static inline int
 __get_maptrack_handle(
-    struct grant_table *t)
+    struct grant_table *t,
+    struct vcpu *v)
 {
-    unsigned int h;
-    if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+    unsigned int head, next;
+
+    /* No maptrack pages allocated for this VCPU yet? */
+    head = v->maptrack_head;
+    if ( unlikely(head == MAPTRACK_TAIL) )
+        return -1;
+
+    /*
+     * Always keep one entry in the free list to make it easier to add
+     * free entries to the tail.
+     */
+    next = read_atomic(&maptrack_entry(t, head).ref);
+    if ( unlikely(next == MAPTRACK_TAIL) )
         return -1;
-    t->maptrack_head = maptrack_entry(t, h).ref;
-    return h;
+
+    v->maptrack_head = next;
+
+    return head;
 }
 
 static inline void
 put_maptrack_handle(
     struct grant_table *t, int handle)
 {
-    spin_lock(&t->maptrack_lock);
-    maptrack_entry(t, handle).ref = t->maptrack_head;
-    t->maptrack_head = handle;
-    spin_unlock(&t->maptrack_lock);
+    struct domain *currd = current->domain;
+    struct vcpu *v;
+    unsigned int prev_tail, cur_tail;
+
+    /* 1. Set entry to be a tail. */
+    maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+    /* 2. Add entry to the tail of the list on the original VCPU. */
+    v = currd->vcpu[maptrack_entry(t,handle).vcpu];
+
+    cur_tail = read_atomic(&v->maptrack_tail);
+    do {
+        prev_tail = cur_tail;
+        cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+    } while ( cur_tail != prev_tail );
+
+    /* 3. Update the old tail entry to point to the new entry. */
+    write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
 }
 
 static inline int
 get_maptrack_handle(
     struct grant_table *lgt)
 {
+    struct vcpu          *curr = current;
     int                   i;
     grant_handle_t        handle;
     struct grant_mapping *new_mt;
-    unsigned int          new_mt_limit, nr_frames;
+
+    handle = __get_maptrack_handle(lgt, curr);
+    if ( likely(handle != -1) )
+        return handle;
 
     spin_lock(&lgt->maptrack_lock);
 
-    while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+    if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
     {
-        nr_frames = nr_maptrack_frames(lgt);
-        if ( nr_frames >= max_maptrack_frames )
-            break;
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
 
-        new_mt = alloc_xenheap_page();
-        if ( !new_mt )
-            break;
+    new_mt = alloc_xenheap_page();
+    if ( !new_mt )
+    {
+        spin_unlock(&lgt->maptrack_lock);
+        return -1;
+    }
+    clear_page(new_mt);
 
-        clear_page(new_mt);
+    /*
+     * Use the first new entry and add the remaining entries to the
+     * head of the free list.
+     */
+    handle = lgt->maptrack_limit;
 
-        new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+    for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
+    {
+        new_mt[i].ref = handle + i + 1;
+        new_mt[i].vcpu = curr->vcpu_id;
+    }
+    new_mt[i - 1].ref = curr->maptrack_head;
 
-        for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-            new_mt[i - 1].ref = lgt->maptrack_limit + i;
-        new_mt[i - 1].ref = lgt->maptrack_head;
-        lgt->maptrack_head = lgt->maptrack_limit;
+    /* Set tail directly if this is the first page for this VCPU. */
+    if ( curr->maptrack_tail == MAPTRACK_TAIL )
+        curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
 
-        lgt->maptrack[nr_frames] = new_mt;
-        smp_wmb();
-        lgt->maptrack_limit      = new_mt_limit;
+    curr->maptrack_head = handle + 1;
 
-        gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
-                 nr_frames + 1);
-    }
+    lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+    lgt->maptrack_limit += MAPTRACK_PER_PAGE;
 
     spin_unlock(&lgt->maptrack_lock);
 
@@ -3048,16 +3092,9 @@ grant_table_create(
     }
 
     /* Tracking of mapped foreign frames table */
-    if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
-                                      max_maptrack_frames)) == NULL )
+    t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
+    if ( t->maptrack == NULL )
         goto no_mem_2;
-    if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
-        goto no_mem_3;
-    clear_page(t->maptrack[0]);
-    t->maptrack_limit = MAPTRACK_PER_PAGE;
-    for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
-        t->maptrack[0][i - 1].ref = i;
-    t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
 
     /* Shared grant table. */
     if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
@@ -3089,8 +3126,7 @@ grant_table_create(
         free_xenheap_page(t->shared_raw[i]);
     xfree(t->shared_raw);
  no_mem_3:
-    free_xenheap_page(t->maptrack[0]);
-    xfree(t->maptrack);
+    vfree(t->maptrack);
  no_mem_2:
     for ( i = 0;
           i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
@@ -3225,7 +3261,7 @@ grant_table_destroy(
 
     for ( i = 0; i < nr_maptrack_frames(t); i++ )
         free_xenheap_page(t->maptrack[i]);
-    xfree(t->maptrack);
+    vfree(t->maptrack);
 
     for ( i = 0; i < nr_active_grant_frames(t); i++ )
         free_xenheap_page(t->active[i]);
@@ -3239,6 +3275,12 @@ grant_table_destroy(
     d->grant_table = NULL;
 }
 
+void grant_table_init_vcpu(struct vcpu *v)
+{
+    v->maptrack_head = MAPTRACK_TAIL;
+    v->maptrack_tail = MAPTRACK_TAIL;
+}
+
 static void gnttab_usage_print(struct domain *rd)
 {
     int first = 1;
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 9536331..9c7b5a3 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -73,9 +73,8 @@ struct grant_table {
     grant_status_t       **status;
     /* Active grant table. */
     struct active_grant_entry **active;
-    /* Mapping tracking table. */
+    /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
-    unsigned int          maptrack_head;
     unsigned int          maptrack_limit;
     /* Lock protecting the maptrack page list, head, and limit */
     spinlock_t            maptrack_lock;
@@ -89,6 +88,7 @@ int grant_table_create(
     struct domain *d);
 void grant_table_destroy(
     struct domain *d);
+void grant_table_init_vcpu(struct vcpu *v);
 
 /* Domain death release of granted mappings of other domains' memory. */
 void
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 604d047..d810e1c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,10 @@ struct vcpu
     /* VCPU paused by system controller. */
     int              controller_pause_count;
 
+    /* Maptrack */
+    unsigned int     maptrack_head;
+    unsigned int     maptrack_tail;
+
     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
     spinlock_t       virq_lock;
-- 
1.7.10.4

  reply	other threads:[~2015-06-18 17:49 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-18 17:18 [PATCHv15 0/2] gnttab: Improve scaleability David Vrabel
2015-06-18 17:18 ` David Vrabel [this message]
2015-06-18 17:18 ` [PATCHv15 2/2] gnttab: steal maptrack entries from other VCPUs David Vrabel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1434647913-16028-2-git-send-email-david.vrabel@citrix.com \
    --to=david.vrabel@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=keir@xen.org \
    --cc=malcolm.crossley@citrix.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.