* [PATCHv15 1/2] gnttab: use per-VCPU maptrack free lists
2015-06-18 17:18 [PATCHv15 0/2] gnttab: Improve scaleability David Vrabel
@ 2015-06-18 17:18 ` David Vrabel
2015-06-18 17:18 ` [PATCHv15 2/2] gnttab: steal maptrack entries from other VCPUs David Vrabel
1 sibling, 0 replies; 3+ messages in thread
From: David Vrabel @ 2015-06-18 17:18 UTC (permalink / raw)
To: xen-devel
Cc: Keir Fraser, Ian Campbell, Tim Deegan, David Vrabel, Jan Beulich,
Malcolm Crossley
From: Malcolm Crossley <malcolm.crossley@citrix.com>
Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.
Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle. Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).
Increase the default maximum number of maptrack frames by 4 times
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
b) a guest may not evenly distribute all the grant map operations
across the VCPUs (meaning some VCPUs need more maptrack entries than
others).
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v15:
- Alloc maptrack array in grant_table_create().
- Init per-VCPU state when VCPUs are allocated.
v14:
- When adding a new frame, use first entry and don't add it to free
list.
- v -> curr and d -> currd (where appropriate).
v13:
- When adding new frames, update v->maptrack_head last.
- Each VCPU no longer gets a reserved number of frames -- they're
allocated first-come, first-served.
- Use original gt->maptrack_limit (instead of gt->maptrack_pages).
v11:
- Allocate maptrack array with vzalloc() since it may be >1 page.
v10:
- Divide max_maptrack_frames evenly amongst the VCPUs.
- Increase default max_maptrack_frames to compensate.
---
xen/common/domain.c | 2 +
xen/common/grant_table.c | 126 +++++++++++++++++++++++++++--------------
xen/include/xen/grant_table.h | 4 +-
xen/include/xen/sched.h | 4 ++
4 files changed, 92 insertions(+), 44 deletions(-)
diff --git a/xen/common/domain.c b/xen/common/domain.c
index b0e83f5..3bc52e6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -127,6 +127,8 @@ struct vcpu *alloc_vcpu(
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
+ grant_table_init_vcpu(v);
+
if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
!zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
!zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index a011276..a371822 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -37,6 +37,7 @@
#include <xen/iommu.h>
#include <xen/paging.h>
#include <xen/keyhandler.h>
+#include <xen/vmap.h>
#include <xsm/xsm.h>
#include <asm/flushtlb.h>
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_grant_frames);
* New options allow to set max_maptrack_frames and
* map_grant_table_frames independently.
*/
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
static unsigned int __read_mostly max_maptrack_frames;
integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
@@ -121,6 +122,8 @@ struct grant_mapping {
u32 ref; /* grant ref */
u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */
domid_t domid; /* granting domain */
+ u32 vcpu; /* vcpu which created the grant mapping */
+ u32 pad; /* round size to a power of 2 */
};
#define MAPTRACK_PER_PAGE (PAGE_SIZE / sizeof(struct grant_mapping))
@@ -289,62 +292,103 @@ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
static inline int
__get_maptrack_handle(
- struct grant_table *t)
+ struct grant_table *t,
+ struct vcpu *v)
{
- unsigned int h;
- if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+ unsigned int head, next;
+
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = v->maptrack_head;
+ if ( unlikely(head == MAPTRACK_TAIL) )
+ return -1;
+
+ /*
+ * Always keep one entry in the free list to make it easier to add
+ * free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
return -1;
- t->maptrack_head = maptrack_entry(t, h).ref;
- return h;
+
+ v->maptrack_head = next;
+
+ return head;
}
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
{
- spin_lock(&t->maptrack_lock);
- maptrack_entry(t, handle).ref = t->maptrack_head;
- t->maptrack_head = handle;
- spin_unlock(&t->maptrack_lock);
+ struct domain *currd = current->domain;
+ struct vcpu *v;
+ unsigned int prev_tail, cur_tail;
+
+ /* 1. Set entry to be a tail. */
+ maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+ /* 2. Add entry to the tail of the list on the original VCPU. */
+ v = currd->vcpu[maptrack_entry(t,handle).vcpu];
+
+ cur_tail = read_atomic(&v->maptrack_tail);
+ do {
+ prev_tail = cur_tail;
+ cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+ } while ( cur_tail != prev_tail );
+
+ /* 3. Update the old tail entry to point to the new entry. */
+ write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
}
static inline int
get_maptrack_handle(
struct grant_table *lgt)
{
+ struct vcpu *curr = current;
int i;
grant_handle_t handle;
struct grant_mapping *new_mt;
- unsigned int new_mt_limit, nr_frames;
+
+ handle = __get_maptrack_handle(lgt, curr);
+ if ( likely(handle != -1) )
+ return handle;
spin_lock(&lgt->maptrack_lock);
- while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+ if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
- nr_frames = nr_maptrack_frames(lgt);
- if ( nr_frames >= max_maptrack_frames )
- break;
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
- new_mt = alloc_xenheap_page();
- if ( !new_mt )
- break;
+ new_mt = alloc_xenheap_page();
+ if ( !new_mt )
+ {
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
+ clear_page(new_mt);
- clear_page(new_mt);
+ /*
+ * Use the first new entry and add the remaining entries to the
+ * head of the free list.
+ */
+ handle = lgt->maptrack_limit;
- new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+ for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
+ {
+ new_mt[i].ref = handle + i + 1;
+ new_mt[i].vcpu = curr->vcpu_id;
+ }
+ new_mt[i - 1].ref = curr->maptrack_head;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- new_mt[i - 1].ref = lgt->maptrack_limit + i;
- new_mt[i - 1].ref = lgt->maptrack_head;
- lgt->maptrack_head = lgt->maptrack_limit;
+ /* Set tail directly if this is the first page for this VCPU. */
+ if ( curr->maptrack_tail == MAPTRACK_TAIL )
+ curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
- lgt->maptrack[nr_frames] = new_mt;
- smp_wmb();
- lgt->maptrack_limit = new_mt_limit;
+ curr->maptrack_head = handle + 1;
- gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
- nr_frames + 1);
- }
+ lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+ lgt->maptrack_limit += MAPTRACK_PER_PAGE;
spin_unlock(&lgt->maptrack_lock);
@@ -3048,16 +3092,9 @@ grant_table_create(
}
/* Tracking of mapped foreign frames table */
- if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
- max_maptrack_frames)) == NULL )
+ t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
+ if ( t->maptrack == NULL )
goto no_mem_2;
- if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
- goto no_mem_3;
- clear_page(t->maptrack[0]);
- t->maptrack_limit = MAPTRACK_PER_PAGE;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- t->maptrack[0][i - 1].ref = i;
- t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
/* Shared grant table. */
if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
@@ -3089,8 +3126,7 @@ grant_table_create(
free_xenheap_page(t->shared_raw[i]);
xfree(t->shared_raw);
no_mem_3:
- free_xenheap_page(t->maptrack[0]);
- xfree(t->maptrack);
+ vfree(t->maptrack);
no_mem_2:
for ( i = 0;
i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
@@ -3225,7 +3261,7 @@ grant_table_destroy(
for ( i = 0; i < nr_maptrack_frames(t); i++ )
free_xenheap_page(t->maptrack[i]);
- xfree(t->maptrack);
+ vfree(t->maptrack);
for ( i = 0; i < nr_active_grant_frames(t); i++ )
free_xenheap_page(t->active[i]);
@@ -3239,6 +3275,12 @@ grant_table_destroy(
d->grant_table = NULL;
}
+void grant_table_init_vcpu(struct vcpu *v)
+{
+ v->maptrack_head = MAPTRACK_TAIL;
+ v->maptrack_tail = MAPTRACK_TAIL;
+}
+
static void gnttab_usage_print(struct domain *rd)
{
int first = 1;
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index 9536331..9c7b5a3 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -73,9 +73,8 @@ struct grant_table {
grant_status_t **status;
/* Active grant table. */
struct active_grant_entry **active;
- /* Mapping tracking table. */
+ /* Mapping tracking table per vcpu. */
struct grant_mapping **maptrack;
- unsigned int maptrack_head;
unsigned int maptrack_limit;
/* Lock protecting the maptrack page list, head, and limit */
spinlock_t maptrack_lock;
@@ -89,6 +88,7 @@ int grant_table_create(
struct domain *d);
void grant_table_destroy(
struct domain *d);
+void grant_table_init_vcpu(struct vcpu *v);
/* Domain death release of granted mappings of other domains' memory. */
void
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 604d047..d810e1c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,10 @@ struct vcpu
/* VCPU paused by system controller. */
int controller_pause_count;
+ /* Maptrack */
+ unsigned int maptrack_head;
+ unsigned int maptrack_tail;
+
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
evtchn_port_t virq_to_evtchn[NR_VIRQS];
spinlock_t virq_lock;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCHv15 2/2] gnttab: steal maptrack entries from other VCPUs
2015-06-18 17:18 [PATCHv15 0/2] gnttab: Improve scaleability David Vrabel
2015-06-18 17:18 ` [PATCHv15 1/2] gnttab: use per-VCPU maptrack free lists David Vrabel
@ 2015-06-18 17:18 ` David Vrabel
1 sibling, 0 replies; 3+ messages in thread
From: David Vrabel @ 2015-06-18 17:18 UTC (permalink / raw)
To: xen-devel
Cc: Keir Fraser, Tim Deegan, David Vrabel, Jan Beulich, Ian Campbell
If a guest is not evenly grant mapping across its VCPUs one of the
VCPUs may run out of free maptrack entries even though other VCPUs
have many free.
If this happens, "steal" free entries from other VCPUs. We want to
steal entries such that:
a) We avoid ping-ponging stolen entries between VCPUs.
b) The number of free entries owned by each VCPUs tends (over time) to
the number it uses.
So when stealing, we select a VCPU at random (reducing (a)) and we
transfer the stolen entries to the thief VCPU (aiming for (b)).
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
v15:
- Fix handle check in steal_maptrack_handle.
v14:
- Use get_random().
- v -> curr, d -> currd and const (where appropriate).
- Check for NULL currd->vcpu[i].
v13:
- Steal an extra entry for the sentinel if a VCPU's free list is
unitialized.
v12:
- New.
---
xen/common/grant_table.c | 95 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 80 insertions(+), 15 deletions(-)
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index a371822..ee29780 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -295,26 +295,70 @@ __get_maptrack_handle(
struct grant_table *t,
struct vcpu *v)
{
- unsigned int head, next;
+ unsigned int head, next, prev_head;
- /* No maptrack pages allocated for this VCPU yet? */
- head = v->maptrack_head;
- if ( unlikely(head == MAPTRACK_TAIL) )
- return -1;
+ do {
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = read_atomic(&v->maptrack_head);
+ if ( unlikely(head == MAPTRACK_TAIL) )
+ return -1;
- /*
- * Always keep one entry in the free list to make it easier to add
- * free entries to the tail.
- */
- next = read_atomic(&maptrack_entry(t, head).ref);
- if ( unlikely(next == MAPTRACK_TAIL) )
- return -1;
+ /*
+ * Always keep one entry in the free list to make it easier to
+ * add free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
+ return -1;
- v->maptrack_head = next;
+ prev_head = head;
+ head = cmpxchg(&v->maptrack_head, prev_head, next);
+ } while ( head != prev_head );
return head;
}
+/*
+ * Try to "steal" a free maptrack entry from another VCPU.
+ *
+ * A stolen entry is transferred to the thief, so the number of
+ * entries for each VCPU should tend to the usage pattern.
+ *
+ * To avoid having to atomically count the number of free entries on
+ * each VCPU and to avoid two VCPU repeatedly stealing entries from
+ * each other, the initial victim VCPU is selected randomly.
+ */
+static int steal_maptrack_handle(struct grant_table *t,
+ const struct vcpu *curr)
+{
+ const struct domain *currd = curr->domain;
+ unsigned int first, i;
+
+ /* Find an initial victim. */
+ first = i = get_random() % currd->max_vcpus;
+
+ do {
+ if ( currd->vcpu[i] )
+ {
+ int handle;
+
+ handle = __get_maptrack_handle(t, currd->vcpu[i]);
+ if ( handle != -1 )
+ {
+ maptrack_entry(t, handle).vcpu = curr->vcpu_id;
+ return handle;
+ }
+ }
+
+ i++;
+ if ( i == currd->max_vcpus )
+ i = 0;
+ } while ( i != first );
+
+ /* No free handles on any VCPU. */
+ return -1;
+}
+
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
@@ -354,10 +398,31 @@ get_maptrack_handle(
spin_lock(&lgt->maptrack_lock);
+ /*
+ * If we've run out of frames, try stealing an entry from another
+ * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+ */
if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
+ /*
+ * Can drop the lock since no other VCPU can be adding a new
+ * frame once they've run out.
+ */
spin_unlock(&lgt->maptrack_lock);
- return -1;
+
+ /*
+ * Uninitialized free list? Steal an extra entry for the tail
+ * sentinel.
+ */
+ if ( curr->maptrack_tail == MAPTRACK_TAIL )
+ {
+ handle = steal_maptrack_handle(lgt, curr);
+ if ( handle == -1 )
+ return -1;
+ curr->maptrack_tail = handle;
+ write_atomic(&curr->maptrack_head, handle);
+ }
+ return steal_maptrack_handle(lgt, curr);
}
new_mt = alloc_xenheap_page();
@@ -385,7 +450,7 @@ get_maptrack_handle(
if ( curr->maptrack_tail == MAPTRACK_TAIL )
curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
- curr->maptrack_head = handle + 1;
+ write_atomic(&curr->maptrack_head, handle + 1);
lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
lgt->maptrack_limit += MAPTRACK_PER_PAGE;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 3+ messages in thread