From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH v5 02/24] xen: make two memory hypercalls vNUMA-aware Date: Fri, 13 Feb 2015 12:00:29 +0000 Message-ID: <54DDE75D.5060400@citrix.com> References: <1423770294-9779-1-git-send-email-wei.liu2@citrix.com> <1423770294-9779-3-git-send-email-wei.liu2@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1423770294-9779-3-git-send-email-wei.liu2@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Wei Liu , xen-devel@lists.xen.org Cc: dario.faggioli@citrix.com, JBeulich@suse.com, ian.jackson@eu.citrix.com, ian.campbell@citrix.com, ufimtseva@gmail.com List-Id: xen-devel@lists.xenproject.org On 12/02/15 19:44, Wei Liu wrote: > Make XENMEM_increase_reservation and XENMEM_populate_physmap > vNUMA-aware. > > That is, if guest requests Xen to allocate memory for specific vnode, > Xen can translate vnode to pnode using vNUMA information of that guest. > > XENMEMF_vnode is introduced for the guest to mark the node number is in > fact virtual node number and should be translated by Xen. > > XENFEAT_memory_op_vnode_supported is introduced to indicate that Xen is > able to translate virtual node to physical node. > > Signed-off-by: Wei Liu > Cc: Jan Beulich > --- > Changes in v5: > 1. New logic in translation function. > > Changes in v3: > 1. Coding style fix. > 2. Remove redundant assignment. > > Changes in v2: > 1. Return start_extent when vnode translation fails. > 2. Expose new feature bit to guest. > 3. Fix typo in comment. > --- > xen/common/kernel.c | 2 +- > xen/common/memory.c | 51 +++++++++++++++++++++++++++++++++++++++---- > xen/include/public/features.h | 3 +++ > xen/include/public/memory.h | 2 ++ > 4 files changed, 53 insertions(+), 5 deletions(-) > > diff --git a/xen/common/kernel.c b/xen/common/kernel.c > index 0d9e519..e5e0050 100644 > --- a/xen/common/kernel.c > +++ b/xen/common/kernel.c > @@ -301,7 +301,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > switch ( fi.submap_idx ) > { > case 0: > - fi.submap = 0; > + fi.submap = (1U << XENFEAT_memory_op_vnode_supported); > if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) ) > fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb); > if ( paging_mode_translate(current->domain) ) > diff --git a/xen/common/memory.c b/xen/common/memory.c > index e84ace9..fa3729b 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -692,6 +692,43 @@ out: > return rc; > } > > +static int translate_vnode_to_pnode(struct domain *d, > + struct xen_memory_reservation *r, const struct xen_memory_reservation *r > + struct memop_args *a) > +{ > + int rc = 0; > + unsigned int vnode, pnode; > + > + if ( r->mem_flags & XENMEMF_vnode ) > + { > + a->memflags &= ~MEMF_node(XENMEMF_get_node(r->mem_flags)); > + a->memflags &= ~MEMF_exact_node; This interface feels semantically wrong, especially as the caller sets these fields first, just to have them dropped at this point. A rather more appropriate function would be something along the lines of "construct_memop_from_reservation()" (name subject to improvement) which takes care completely filling 'a' from 'r', doing a v->p translation if needed. ~Andrew > + > + read_lock(&d->vnuma_rwlock); > + if ( d->vnuma ) > + { > + vnode = XENMEMF_get_node(r->mem_flags); > + > + if ( vnode < d->vnuma->nr_vnodes ) > + { > + pnode = d->vnuma->vnode_to_pnode[vnode]; > + > + if ( pnode != NUMA_NO_NODE ) > + { > + a->memflags |= MEMF_node(pnode); > + if ( r->mem_flags & XENMEMF_exact_node_request ) > + a->memflags |= MEMF_exact_node; > + } > + } > + else > + rc = -EINVAL; > + } > + read_unlock(&d->vnuma_rwlock); > + } > + > + return rc; > +} > + > long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > { > struct domain *d; > @@ -734,10 +771,6 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > args.memflags = MEMF_bits(address_bits); > } > > - args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); > - if ( reservation.mem_flags & XENMEMF_exact_node_request ) > - args.memflags |= MEMF_exact_node; > - > if ( op == XENMEM_populate_physmap > && (reservation.mem_flags & XENMEMF_populate_on_demand) ) > args.memflags |= MEMF_populate_on_demand; > @@ -747,6 +780,16 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > return start_extent; > args.domain = d; > > + args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); > + if ( reservation.mem_flags & XENMEMF_exact_node_request ) > + args.memflags |= MEMF_exact_node; > + > + if ( translate_vnode_to_pnode(d, &reservation, &args) ) > + { > + rcu_unlock_domain(d); > + return start_extent; > + } > + > if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) ) > { > rcu_unlock_domain(d); > diff --git a/xen/include/public/features.h b/xen/include/public/features.h > index 16d92aa..2110b04 100644 > --- a/xen/include/public/features.h > +++ b/xen/include/public/features.h > @@ -99,6 +99,9 @@ > #define XENFEAT_grant_map_identity 12 > */ > > +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ > +#define XENFEAT_memory_op_vnode_supported 13 > + > #define XENFEAT_NR_SUBMAPS 1 > > #endif /* __XEN_PUBLIC_FEATURES_H__ */ > diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h > index 595f953..2b5206b 100644 > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -55,6 +55,8 @@ > /* Flag to request allocation only from the node specified */ > #define XENMEMF_exact_node_request (1<<17) > #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) > +/* Flag to indicate the node specified is virtual node */ > +#define XENMEMF_vnode (1<<18) > #endif > > struct xen_memory_reservation {