From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xen.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Ian Campbell <Ian.Campbell@citrix.com>,
Wei Liu <wei.liu2@citrix.com>
Subject: [PATCH] tools/libxl: Fixes to stream v2 task joining logic
Date: Thu, 23 Jul 2015 12:09:38 +0100 [thread overview]
Message-ID: <1437649778-16728-1-git-send-email-andrew.cooper3@citrix.com> (raw)
During review of the libxl migration v2 series, I changes the task
joining logic, but clearly didn't think the result through
properly. This patch fixes several errors.
1) Do not call check_all_finished() in the success cases of
libxl__xc_domain_{save,restore}_done(). It serves no specific purpose
as the save helper state will report itself as inactive by this point,
and avoids triggering a second stream->completion_callback() in the case
that write_toolstack_record()/stream_continue() record errors
synchronously themselves.
2) Only ever set stream->rc in stream_complete(). The first version of
the migration v2 series had separate rc and joined_rc parameters, where
this logic worked. However when combining the two, the teardown path
fails to trigger if stream_done() records stream->rc itself. A side
effect of this is that stream_done() needs to take an rc parameter.
3) Avoid stacking of check_all_finished() via synchronous teardown of
tasks. If the _abort() functions call back synchronously,
stream->completion_callback() ends up getting called twice, as first and
last check_all_finished() frames observe each task being finished.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
I found this while working to fix the toolstack record issue, but am
posting this bugfix ahead of the other work as OSSTest has tripped over
the issue.
---
tools/libxl/libxl_internal.h | 2 ++
tools/libxl/libxl_stream_read.c | 32 ++++++++++++++++++++++++++------
tools/libxl/libxl_stream_write.c | 32 ++++++++++++++++++++++++++------
3 files changed, 54 insertions(+), 12 deletions(-)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3c09668..73b4d62 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2996,6 +2996,7 @@ struct libxl__stream_write_state {
int rc;
bool running;
bool in_checkpoint;
+ bool sync_teardown;
libxl__save_helper_state shs;
/* Main stream-writing data. */
@@ -3337,6 +3338,7 @@ struct libxl__stream_read_state {
int rc;
bool running;
bool in_checkpoint;
+ bool sync_teardown;
libxl__save_helper_state shs;
libxl__conversion_helper_state chs;
diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 32a3551..70d3e5f 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -112,7 +112,7 @@ static void stream_complete(libxl__egc *egc,
static void checkpoint_done(libxl__egc *egc,
libxl__stream_read_state *stream, int rc);
static void stream_done(libxl__egc *egc,
- libxl__stream_read_state *stream);
+ libxl__stream_read_state *stream, int rc);
static void conversion_done(libxl__egc *egc,
libxl__conversion_helper_state *chs, int rc);
static void check_all_finished(libxl__egc *egc,
@@ -176,6 +176,7 @@ void libxl__stream_read_init(libxl__stream_read_state *stream)
stream->rc = 0;
stream->running = false;
stream->in_checkpoint = false;
+ stream->sync_teardown = false;
libxl__save_helper_init(&stream->shs);
libxl__conversion_helper_init(&stream->chs);
FILLZERO(stream->dc);
@@ -669,9 +670,7 @@ static void stream_complete(libxl__egc *egc,
return;
}
- if (!stream->rc)
- stream->rc = rc;
- stream_done(egc, stream);
+ stream_done(egc, stream, rc);
}
static void checkpoint_done(libxl__egc *egc,
@@ -695,7 +694,7 @@ static void checkpoint_done(libxl__egc *egc,
}
static void stream_done(libxl__egc *egc,
- libxl__stream_read_state *stream)
+ libxl__stream_read_state *stream, int rc)
{
libxl__sr_record_buf *rec, *trec;
@@ -720,7 +719,7 @@ static void stream_done(libxl__egc *egc,
LIBXL_STAILQ_FOREACH_SAFE(rec, &stream->record_queue, entry, trec)
free_record(rec);
- check_all_finished(egc, stream, stream->rc);
+ check_all_finished(egc, stream, rc);
}
void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
@@ -744,6 +743,7 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
* libxl records from it.
*/
stream_continue(egc, stream);
+ return;
err:
check_all_finished(egc, stream, rc);
@@ -762,13 +762,33 @@ static void check_all_finished(libxl__egc *egc,
{
STATE_AO_GC(stream->ao);
+ /*
+ * In the case of a failure, the _abort()'s below might cancel
+ * synchronously on top of us, or asynchronously at a later point.
+ *
+ * We must avoid the situation where all _abort() cancel
+ * synchronously and the completion_callback() gets called twice;
+ * once by the first error and once by the final stacked abort(),
+ * both of whom will find that all of the tasks have stopped.
+ *
+ * To avoid this problem, any stacked re-entry into this function is
+ * ineligible to fire the completion callback. The outermost
+ * instance will take care of completing, once the stack has
+ * unwound.
+ */
+ if (stream->sync_teardown)
+ return;
+
if (!stream->rc && rc) {
/* First reported failure. Tear everything down. */
stream->rc = rc;
+ stream->sync_teardown = true;
libxl__stream_read_abort(egc, stream, rc);
libxl__save_helper_abort(egc, &stream->shs);
libxl__conversion_helper_abort(egc, &stream->chs, rc);
+
+ stream->sync_teardown = false;
}
/* Don't fire the callback until all our parallel tasks have stopped. */
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 5bff52b..5135bc3 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -55,7 +55,7 @@ static void stream_success(libxl__egc *egc,
static void stream_complete(libxl__egc *egc,
libxl__stream_write_state *stream, int rc);
static void stream_done(libxl__egc *egc,
- libxl__stream_write_state *stream);
+ libxl__stream_write_state *stream, int rc);
static void checkpoint_done(libxl__egc *egc,
libxl__stream_write_state *stream,
int rc);
@@ -158,6 +158,7 @@ void libxl__stream_write_init(libxl__stream_write_state *stream)
stream->rc = 0;
stream->running = false;
stream->in_checkpoint = false;
+ stream->sync_teardown = false;
FILLZERO(stream->dc);
stream->record_done_callback = NULL;
FILLZERO(stream->emu_dc);
@@ -275,6 +276,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void,
}
write_toolstack_record(egc, stream);
+ return;
err:
check_all_finished(egc, stream, rc);
@@ -492,13 +494,11 @@ static void stream_complete(libxl__egc *egc,
return;
}
- if (!stream->rc)
- stream->rc = rc;
- stream_done(egc, stream);
+ stream_done(egc, stream, rc);
}
static void stream_done(libxl__egc *egc,
- libxl__stream_write_state *stream)
+ libxl__stream_write_state *stream, int rc)
{
assert(stream->running);
stream->running = false;
@@ -507,7 +507,7 @@ static void stream_done(libxl__egc *egc,
libxl__carefd_close(stream->emu_carefd);
free(stream->emu_body);
- check_all_finished(egc, stream, stream->rc);
+ check_all_finished(egc, stream, rc);
}
static void checkpoint_done(libxl__egc *egc,
@@ -526,12 +526,32 @@ static void check_all_finished(libxl__egc *egc,
{
STATE_AO_GC(stream->ao);
+ /*
+ * In the case of a failure, the _abort()'s below might cancel
+ * synchronously on top of us, or asynchronously at a later point.
+ *
+ * We must avoid the situation where all _abort() cancel
+ * synchronously and the completion_callback() gets called twice;
+ * once by the first error and once by the final stacked abort(),
+ * both of whom will find that all of the tasks have stopped.
+ *
+ * To avoid this problem, any stacked re-entry into this function is
+ * ineligible to fire the completion callback. The outermost
+ * instance will take care of completing, once the stack has
+ * unwound.
+ */
+ if (stream->sync_teardown)
+ return;
+
if (!stream->rc && rc) {
/* First reported failure. Tear everything down. */
stream->rc = rc;
+ stream->sync_teardown = true;
libxl__stream_write_abort(egc, stream, rc);
libxl__save_helper_abort(egc, &stream->shs);
+
+ stream->sync_teardown = false;
}
/* Don't fire the callback until all our parallel tasks have stopped. */
--
1.7.10.4
next reply other threads:[~2015-07-23 11:09 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-23 11:09 Andrew Cooper [this message]
2015-07-23 11:38 ` [PATCH] tools/libxl: Fixes to stream v2 task joining logic Wei Liu
2015-07-23 11:42 ` Andrew Cooper
2015-07-23 12:03 ` Wei Liu
2015-07-23 13:07 ` Andrew Cooper
2015-07-23 14:02 ` Ian Jackson
2015-07-24 11:41 ` Ian Jackson
2015-07-27 14:30 ` Andrew Cooper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1437649778-16728-1-git-send-email-andrew.cooper3@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=Ian.Campbell@citrix.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).