From mboxrd@z Thu Jan 1 00:00:00 1970 From: George Dunlap Subject: Re: [PATCH 2 of 2 RFC] xl: allow for moving the domain's memory when changing vcpu affinity Date: Fri, 6 Jul 2012 13:53:57 +0100 Message-ID: <4FF6DFE5.7060403@eu.citrix.com> References: <89aba27edf62271a4862.1341568445@Solace> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <89aba27edf62271a4862.1341568445@Solace> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Dario Faggioli Cc: "Zhang, Yang Z" , Andre Przywara , Ian Jackson , Ian Campbell , xen-devel List-Id: xen-devel@lists.xenproject.org On 06/07/12 10:54, Dario Faggioli wrote: > By introducing a new '-M' option to the `xl vcpu-pin' command. The actual > memory "movement" is achieved suspending the domain to a ttemporary file and > resuming it with the new vcpu-affinity Hmm... this will work and be reliable, but it seems a bit clunky. Long term we want to be able to do node migration in the background without shutting down a VM, right? If that can be done in the 4.3 timeframe, then it seems unnecessary to implement something like this. -George > . > > Signed-off-by: dario.faggioli > > diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 > --- a/docs/man/xl.pod.1 > +++ b/docs/man/xl.pod.1 > @@ -606,7 +606,7 @@ after B, go to B sec > Lists VCPU information for a specific domain. If no domain is > specified, VCPU information for all domains will be provided. > > -=item B I I I > +=item B [I] I I I > > Pins the VCPU to only run on the specific CPUs. The keyword > B can be used to apply the I list to all VCPUs in the > @@ -617,6 +617,22 @@ different run state is appropriate. Pin > this, by ensuring certain VCPUs can only run on certain physical > CPUs. > > +B > + > +=over 4 > + > +=item I<-M> > + > +If and only if using this together with I equal to B, not only > +pin the VCPUs to I, it also tries to make sure all the memory for > +the domain comes from the NUMA nodes I belong to. This involves > +suspending and resuming the domain, so some (hopefully small) downtime > +should be expected, as well as the change of the domain id. > + > +=back > + > +=back > + > =item B > > Prints information about guests. This list excludes information about > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -4061,7 +4061,52 @@ int main_vcpulist(int argc, char **argv) > return 0; > } > > -static void vcpupin(const char *d, const char *vcpu, char *cpu) > +/* > + * Easiest way for having all the memory allocated in such a way that it > + * reflects the vcpu affinity is to save and re-create the domain (as it > + * happens when migrating and suspend/resumimg). > + */ > +static int vcpupin_move(const char *d, char *cpu) > +{ > + struct domain_create dom_info; > + char savefile[] = "/tmp/vcpupin-autosave.XXXXXX"; > + char extra_config[1024]; > + uint8_t *config_data; > + int config_len; > + int save_fd; > + > + /* Setup a temporary core file for the domain */ > + save_fd = mkostemp(savefile, O_WRONLY|O_CREAT|O_TRUNC); > + if (save_fd< 0) > + return -1; > + > + /* Save and destroy the current incarnation of the domain */ > + MUST(libxl_userdata_retrieve(ctx, domid, "xl",&config_data,&config_len)); > + save_domain_core_writeconfig(save_fd, savefile, config_data, config_len); > + > + MUST(libxl_domain_suspend(ctx, domid, save_fd, LIBXL_SUSPEND_LIVE, NULL)); > + MUST(libxl_domain_destroy(ctx, domid)); > + close(save_fd); > + domid = INVALID_DOMID; > + > + /* Default parameters plus some extra_config for domain re-creation */ > + memset(&dom_info, 0, sizeof(dom_info)); > + dom_info.quiet = 1; > + dom_info.migrate_fd = -1; > + dom_info.monitor = dom_info.daemonize = 1; > + dom_info.incr_generationid = 1; > + dom_info.restore_file = savefile; > + sprintf(extra_config, "cpus=\"%s\"\n", cpu); > + dom_info.extra_config = extra_config; > + > + /* Finally, re-create the domain */ > + MUST(create_domain(&dom_info)); > + unlink(savefile); > + > + return 0; > +} > + > +static void vcpupin(const char *d, const char *vcpu, char *cpu, int move) > { > libxl_vcpuinfo *vcpuinfo; > libxl_cpumap cpumap; > @@ -4078,6 +4123,10 @@ static void vcpupin(const char *d, const > } > vcpuid = -1; > } > + if (vcpuid != -1&& move) { > + fprintf(stderr, "Error: can only move when VCPU is \"all\"\n"); > + return; > + } > > find_domain(d); > > @@ -4094,18 +4143,22 @@ static void vcpupin(const char *d, const > } > } > else { > - if (!(vcpuinfo = libxl_list_vcpu(ctx, domid,&nb_vcpu,&i))) { > - fprintf(stderr, "libxl_list_vcpu failed.\n"); > - goto vcpupin_out1; > - } > - for (i = 0; i< nb_vcpu; i++) { > - if (libxl_set_vcpuaffinity(ctx, domid, vcpuinfo[i].vcpuid, > -&cpumap) == -1) { > - fprintf(stderr, "libxl_set_vcpuaffinity failed" > - " on vcpu `%u'.\n", vcpuinfo[i].vcpuid); > + if (move&& vcpupin_move(d, cpu)) > + fprintf(stderr, "Could not move the domain\n"); > + else { > + if (!(vcpuinfo = libxl_list_vcpu(ctx, domid,&nb_vcpu,&i))) { > + fprintf(stderr, "libxl_list_vcpu failed.\n"); > + goto vcpupin_out1; > } > - } > - libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu); > + for (i = 0; i< nb_vcpu; i++) { > + if (libxl_set_vcpuaffinity(ctx, domid, vcpuinfo[i].vcpuid, > +&cpumap) == -1) { > + fprintf(stderr, "libxl_set_vcpuaffinity failed" > + " on vcpu `%u'.\n", vcpuinfo[i].vcpuid); > + } > + } > + libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu); > + } > } > vcpupin_out1: > libxl_cpumap_dispose(&cpumap); > @@ -4115,12 +4168,20 @@ static void vcpupin(const char *d, const > > int main_vcpupin(int argc, char **argv) > { > + int move_memory = 0; > int opt; > > - if ((opt = def_getopt(argc, argv, "", "vcpu-pin", 3)) != -1) > - return opt; > - > - vcpupin(argv[optind], argv[optind+1] , argv[optind+2]); > + if ((opt = def_getopt(argc, argv, "M", "vcpu-pin", 3)) != -1) { > + switch(opt) { > + case 0: case 2: > + return opt; > + case 'M': > + move_memory = 1; > + break; > + } > + } > + > + vcpupin(argv[optind], argv[optind+1] , argv[optind+2], move_memory); > return 0; > } > > diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c > --- a/tools/libxl/xl_cmdtable.c > +++ b/tools/libxl/xl_cmdtable.c > @@ -207,7 +207,10 @@ struct cmd_spec cmd_table[] = { > { "vcpu-pin", > &main_vcpupin, 0, 1, > "Set which CPUs a VCPU can use", > - " ", > + "[options] ", > + "-M Move the memory to th nodes corresponding to CPUs\n" > + " (involves suspending/resuming the domain, so some\n" > + " downtime is to be expected)" > }, > { "vcpu-set", > &main_vcpuset, 0, 1,