From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ian Campbell Subject: Re: [PATCH 4 of 5 V2] tools/libxl: Control network buffering in remus callbacks Date: Wed, 4 Sep 2013 16:19:45 +0100 Message-ID: <1378307985.17510.143.camel@kazak.uk.xensource.com> References: Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Shriram Rajagopalan Cc: Andrew Cooper , Stefano Stabellini , Ian Jackson , xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On Thu, 2013-08-29 at 15:16 -0700, Shriram Rajagopalan wrote: > # HG changeset patch > # User Shriram Rajagopalan > # Date 1377813001 25200 > # Node ID b8839e0b61c2e15b94a274cfddedf0814d2560de > # Parent 3e8b0aa7cd4a945ec3efe14c969700c9e23ea2cc > tools/libxl: Control network buffering in remus callbacks > > This patch constitutes the core network buffering logic. > and does the following: > a) create a new network buffer when the domain is suspended > (remus_domain_suspend_callback) > b) release the previous network buffer pertaining to the > committed checkpoint (remus_domain_checkpoint_dm_saved) > > Signed-off-by: Shriram Rajagopalan Looks ok to me, but I'd like Ian's opinion on the interactions with the async stuff in the previous patch. > > diff -r 3e8b0aa7cd4a -r b8839e0b61c2 tools/libxl/libxl_dom.c > --- a/tools/libxl/libxl_dom.c Thu Aug 29 14:36:36 2013 -0700 > +++ b/tools/libxl/libxl_dom.c Thu Aug 29 14:50:01 2013 -0700 > @@ -1215,8 +1215,24 @@ int libxl__toolstack_save(uint32_t domid > > static int libxl__remus_domain_suspend_callback(void *data) > { > - /* REMUS TODO: Issue disk and network checkpoint reqs. */ > - return libxl__domain_suspend_common_callback(data); > + /* REMUS TODO: Issue disk checkpoint reqs. */ > + libxl__save_helper_state *shs = data; > + libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs); > + libxl__remus_ctx *remus_ctx = dss->remus_ctx; > + bool is_suspended; > + STATE_AO_GC(dss->ao); > + > + is_suspended = !!libxl__domain_suspend_common_callback(data); > + > + if (!remus_ctx->netbuf_ctx) return is_suspended; > + > + if (is_suspended) { > + if (libxl__remus_netbuf_start_new_epoch(gc, dss->domid, > + remus_ctx)) > + return !is_suspended; > + } > + > + return is_suspended; > } > > static int libxl__remus_domain_resume_callback(void *data) > @@ -1229,7 +1245,7 @@ static int libxl__remus_domain_resume_ca > if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) > return 0; > > - /* REMUS TODO: Deal with disk. Start a new network output buffer */ > + /* REMUS TODO: Deal with disk. */ > return 1; > } > > @@ -1256,10 +1272,36 @@ static void libxl__remus_domain_checkpoi > static void remus_checkpoint_dm_saved(libxl__egc *egc, > libxl__domain_suspend_state *dss, int rc) > { > - /* REMUS TODO: Wait for disk and memory ack, release network buffer */ > - /* REMUS TODO: make this asynchronous */ > - assert(!rc); /* REMUS TODO handle this error properly */ > - usleep(dss->remus_ctx->interval * 1000); > + /* > + * REMUS TODO: Wait for disk and explicit memory ack (through restore > + * callback from remote) before releasing network buffer. > + */ > + libxl__remus_ctx *remus_ctx = dss->remus_ctx; > + struct timespec epoch; > + int ret; > + STATE_AO_GC(dss->ao); > + > + if (rc) { > + LOG(ERROR, "Failed to save device model. Terminating Remus.."); > + libxl__xc_domain_saverestore_async_callback_done(egc, > + &dss->shs, rc); > + return; > + } > + > + if (remus_ctx->netbuf_ctx) { > + ret = libxl__remus_netbuf_release_prev_epoch(gc, dss->domid, > + remus_ctx); > + if (ret) { > + libxl__xc_domain_saverestore_async_callback_done(egc, > + &dss->shs, > + ret); > + return; > + } > + } > + > + epoch.tv_sec = remus_ctx->interval / 1000; /* interval is in ms */ > + epoch.tv_nsec = remus_ctx->interval * 1000L * 1000L; > + nanosleep(&epoch, 0); > libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 1); > } >