From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Vrabel Subject: [PATCHv15 1/2] gnttab: use per-VCPU maptrack free lists Date: Thu, 18 Jun 2015 18:18:32 +0100 Message-ID: <1434647913-16028-2-git-send-email-david.vrabel@citrix.com> References: <1434647913-16028-1-git-send-email-david.vrabel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1Z5dwq-0008OW-Qq for xen-devel@lists.xenproject.org; Thu, 18 Jun 2015 17:49:57 +0000 In-Reply-To: <1434647913-16028-1-git-send-email-david.vrabel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xenproject.org Cc: Keir Fraser , Ian Campbell , Tim Deegan , David Vrabel , Jan Beulich , Malcolm Crossley List-Id: xen-devel@lists.xenproject.org From: Malcolm Crossley Performance analysis of aggregate network throughput with many VMs shows that performance is signficantly limited by contention on the maptrack lock when obtaining/releasing maptrack handles from the free list. Instead of a single free list use a per-VCPU list. This avoids any contention when obtaining a handle. Handles must be released back to their original list and since this may occur on a different VCPU there is some contention on the destination VCPU's free list tail pointer (but this is much better than a per-domain lock). Increase the default maximum number of maptrack frames by 4 times because: a) struct grant_mapping is now 16 bytes (instead of 8); and b) a guest may not evenly distribute all the grant map operations across the VCPUs (meaning some VCPUs need more maptrack entries than others). Signed-off-by: Malcolm Crossley Signed-off-by: David Vrabel --- v15: - Alloc maptrack array in grant_table_create(). - Init per-VCPU state when VCPUs are allocated. v14: - When adding a new frame, use first entry and don't add it to free list. - v -> curr and d -> currd (where appropriate). v13: - When adding new frames, update v->maptrack_head last. - Each VCPU no longer gets a reserved number of frames -- they're allocated first-come, first-served. - Use original gt->maptrack_limit (instead of gt->maptrack_pages). v11: - Allocate maptrack array with vzalloc() since it may be >1 page. v10: - Divide max_maptrack_frames evenly amongst the VCPUs. - Increase default max_maptrack_frames to compensate. --- xen/common/domain.c | 2 + xen/common/grant_table.c | 126 +++++++++++++++++++++++++++-------------- xen/include/xen/grant_table.h | 4 +- xen/include/xen/sched.h | 4 ++ 4 files changed, 92 insertions(+), 44 deletions(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index b0e83f5..3bc52e6 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -127,6 +127,8 @@ struct vcpu *alloc_vcpu( tasklet_init(&v->continue_hypercall_tasklet, NULL, 0); + grant_table_init_vcpu(v); + if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) || !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) || !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) || diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index a011276..a371822 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_grant_frames); * New options allow to set max_maptrack_frames and * map_grant_table_frames independently. */ -#define DEFAULT_MAX_MAPTRACK_FRAMES 256 +#define DEFAULT_MAX_MAPTRACK_FRAMES 1024 static unsigned int __read_mostly max_maptrack_frames; integer_param("gnttab_max_maptrack_frames", max_maptrack_frames); @@ -121,6 +122,8 @@ struct grant_mapping { u32 ref; /* grant ref */ u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */ domid_t domid; /* granting domain */ + u32 vcpu; /* vcpu which created the grant mapping */ + u32 pad; /* round size to a power of 2 */ }; #define MAPTRACK_PER_PAGE (PAGE_SIZE / sizeof(struct grant_mapping)) @@ -289,62 +292,103 @@ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt) static inline int __get_maptrack_handle( - struct grant_table *t) + struct grant_table *t, + struct vcpu *v) { - unsigned int h; - if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) ) + unsigned int head, next; + + /* No maptrack pages allocated for this VCPU yet? */ + head = v->maptrack_head; + if ( unlikely(head == MAPTRACK_TAIL) ) + return -1; + + /* + * Always keep one entry in the free list to make it easier to add + * free entries to the tail. + */ + next = read_atomic(&maptrack_entry(t, head).ref); + if ( unlikely(next == MAPTRACK_TAIL) ) return -1; - t->maptrack_head = maptrack_entry(t, h).ref; - return h; + + v->maptrack_head = next; + + return head; } static inline void put_maptrack_handle( struct grant_table *t, int handle) { - spin_lock(&t->maptrack_lock); - maptrack_entry(t, handle).ref = t->maptrack_head; - t->maptrack_head = handle; - spin_unlock(&t->maptrack_lock); + struct domain *currd = current->domain; + struct vcpu *v; + unsigned int prev_tail, cur_tail; + + /* 1. Set entry to be a tail. */ + maptrack_entry(t, handle).ref = MAPTRACK_TAIL; + + /* 2. Add entry to the tail of the list on the original VCPU. */ + v = currd->vcpu[maptrack_entry(t,handle).vcpu]; + + cur_tail = read_atomic(&v->maptrack_tail); + do { + prev_tail = cur_tail; + cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle); + } while ( cur_tail != prev_tail ); + + /* 3. Update the old tail entry to point to the new entry. */ + write_atomic(&maptrack_entry(t, prev_tail).ref, handle); } static inline int get_maptrack_handle( struct grant_table *lgt) { + struct vcpu *curr = current; int i; grant_handle_t handle; struct grant_mapping *new_mt; - unsigned int new_mt_limit, nr_frames; + + handle = __get_maptrack_handle(lgt, curr); + if ( likely(handle != -1) ) + return handle; spin_lock(&lgt->maptrack_lock); - while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) ) + if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) { - nr_frames = nr_maptrack_frames(lgt); - if ( nr_frames >= max_maptrack_frames ) - break; + spin_unlock(&lgt->maptrack_lock); + return -1; + } - new_mt = alloc_xenheap_page(); - if ( !new_mt ) - break; + new_mt = alloc_xenheap_page(); + if ( !new_mt ) + { + spin_unlock(&lgt->maptrack_lock); + return -1; + } + clear_page(new_mt); - clear_page(new_mt); + /* + * Use the first new entry and add the remaining entries to the + * head of the free list. + */ + handle = lgt->maptrack_limit; - new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE; + for ( i = 0; i < MAPTRACK_PER_PAGE; i++ ) + { + new_mt[i].ref = handle + i + 1; + new_mt[i].vcpu = curr->vcpu_id; + } + new_mt[i - 1].ref = curr->maptrack_head; - for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ) - new_mt[i - 1].ref = lgt->maptrack_limit + i; - new_mt[i - 1].ref = lgt->maptrack_head; - lgt->maptrack_head = lgt->maptrack_limit; + /* Set tail directly if this is the first page for this VCPU. */ + if ( curr->maptrack_tail == MAPTRACK_TAIL ) + curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1; - lgt->maptrack[nr_frames] = new_mt; - smp_wmb(); - lgt->maptrack_limit = new_mt_limit; + curr->maptrack_head = handle + 1; - gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n", - nr_frames + 1); - } + lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt; + lgt->maptrack_limit += MAPTRACK_PER_PAGE; spin_unlock(&lgt->maptrack_lock); @@ -3048,16 +3092,9 @@ grant_table_create( } /* Tracking of mapped foreign frames table */ - if ( (t->maptrack = xzalloc_array(struct grant_mapping *, - max_maptrack_frames)) == NULL ) + t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack)); + if ( t->maptrack == NULL ) goto no_mem_2; - if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL ) - goto no_mem_3; - clear_page(t->maptrack[0]); - t->maptrack_limit = MAPTRACK_PER_PAGE; - for ( i = 1; i < MAPTRACK_PER_PAGE; i++ ) - t->maptrack[0][i - 1].ref = i; - t->maptrack[0][i - 1].ref = MAPTRACK_TAIL; /* Shared grant table. */ if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL ) @@ -3089,8 +3126,7 @@ grant_table_create( free_xenheap_page(t->shared_raw[i]); xfree(t->shared_raw); no_mem_3: - free_xenheap_page(t->maptrack[0]); - xfree(t->maptrack); + vfree(t->maptrack); no_mem_2: for ( i = 0; i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ ) @@ -3225,7 +3261,7 @@ grant_table_destroy( for ( i = 0; i < nr_maptrack_frames(t); i++ ) free_xenheap_page(t->maptrack[i]); - xfree(t->maptrack); + vfree(t->maptrack); for ( i = 0; i < nr_active_grant_frames(t); i++ ) free_xenheap_page(t->active[i]); @@ -3239,6 +3275,12 @@ grant_table_destroy( d->grant_table = NULL; } +void grant_table_init_vcpu(struct vcpu *v) +{ + v->maptrack_head = MAPTRACK_TAIL; + v->maptrack_tail = MAPTRACK_TAIL; +} + static void gnttab_usage_print(struct domain *rd) { int first = 1; diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 9536331..9c7b5a3 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -73,9 +73,8 @@ struct grant_table { grant_status_t **status; /* Active grant table. */ struct active_grant_entry **active; - /* Mapping tracking table. */ + /* Mapping tracking table per vcpu. */ struct grant_mapping **maptrack; - unsigned int maptrack_head; unsigned int maptrack_limit; /* Lock protecting the maptrack page list, head, and limit */ spinlock_t maptrack_lock; @@ -89,6 +88,7 @@ int grant_table_create( struct domain *d); void grant_table_destroy( struct domain *d); +void grant_table_init_vcpu(struct vcpu *v); /* Domain death release of granted mappings of other domains' memory. */ void diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 604d047..d810e1c 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -219,6 +219,10 @@ struct vcpu /* VCPU paused by system controller. */ int controller_pause_count; + /* Maptrack */ + unsigned int maptrack_head; + unsigned int maptrack_tail; + /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */ evtchn_port_t virq_to_evtchn[NR_VIRQS]; spinlock_t virq_lock; -- 1.7.10.4