From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: [PATCH] tools/libxl: Fixes to stream v2 task joining logic Date: Thu, 23 Jul 2015 12:09:38 +0100 Message-ID: <1437649778-16728-1-git-send-email-andrew.cooper3@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel Cc: Andrew Cooper , Ian Jackson , Ian Campbell , Wei Liu List-Id: xen-devel@lists.xenproject.org During review of the libxl migration v2 series, I changes the task joining logic, but clearly didn't think the result through properly. This patch fixes several errors. 1) Do not call check_all_finished() in the success cases of libxl__xc_domain_{save,restore}_done(). It serves no specific purpose as the save helper state will report itself as inactive by this point, and avoids triggering a second stream->completion_callback() in the case that write_toolstack_record()/stream_continue() record errors synchronously themselves. 2) Only ever set stream->rc in stream_complete(). The first version of the migration v2 series had separate rc and joined_rc parameters, where this logic worked. However when combining the two, the teardown path fails to trigger if stream_done() records stream->rc itself. A side effect of this is that stream_done() needs to take an rc parameter. 3) Avoid stacking of check_all_finished() via synchronous teardown of tasks. If the _abort() functions call back synchronously, stream->completion_callback() ends up getting called twice, as first and last check_all_finished() frames observe each task being finished. Signed-off-by: Andrew Cooper CC: Ian Campbell CC: Ian Jackson CC: Wei Liu --- I found this while working to fix the toolstack record issue, but am posting this bugfix ahead of the other work as OSSTest has tripped over the issue. --- tools/libxl/libxl_internal.h | 2 ++ tools/libxl/libxl_stream_read.c | 32 ++++++++++++++++++++++++++------ tools/libxl/libxl_stream_write.c | 32 ++++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 3c09668..73b4d62 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2996,6 +2996,7 @@ struct libxl__stream_write_state { int rc; bool running; bool in_checkpoint; + bool sync_teardown; libxl__save_helper_state shs; /* Main stream-writing data. */ @@ -3337,6 +3338,7 @@ struct libxl__stream_read_state { int rc; bool running; bool in_checkpoint; + bool sync_teardown; libxl__save_helper_state shs; libxl__conversion_helper_state chs; diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c index 32a3551..70d3e5f 100644 --- a/tools/libxl/libxl_stream_read.c +++ b/tools/libxl/libxl_stream_read.c @@ -112,7 +112,7 @@ static void stream_complete(libxl__egc *egc, static void checkpoint_done(libxl__egc *egc, libxl__stream_read_state *stream, int rc); static void stream_done(libxl__egc *egc, - libxl__stream_read_state *stream); + libxl__stream_read_state *stream, int rc); static void conversion_done(libxl__egc *egc, libxl__conversion_helper_state *chs, int rc); static void check_all_finished(libxl__egc *egc, @@ -176,6 +176,7 @@ void libxl__stream_read_init(libxl__stream_read_state *stream) stream->rc = 0; stream->running = false; stream->in_checkpoint = false; + stream->sync_teardown = false; libxl__save_helper_init(&stream->shs); libxl__conversion_helper_init(&stream->chs); FILLZERO(stream->dc); @@ -669,9 +670,7 @@ static void stream_complete(libxl__egc *egc, return; } - if (!stream->rc) - stream->rc = rc; - stream_done(egc, stream); + stream_done(egc, stream, rc); } static void checkpoint_done(libxl__egc *egc, @@ -695,7 +694,7 @@ static void checkpoint_done(libxl__egc *egc, } static void stream_done(libxl__egc *egc, - libxl__stream_read_state *stream) + libxl__stream_read_state *stream, int rc) { libxl__sr_record_buf *rec, *trec; @@ -720,7 +719,7 @@ static void stream_done(libxl__egc *egc, LIBXL_STAILQ_FOREACH_SAFE(rec, &stream->record_queue, entry, trec) free_record(rec); - check_all_finished(egc, stream, stream->rc); + check_all_finished(egc, stream, rc); } void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void, @@ -744,6 +743,7 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void, * libxl records from it. */ stream_continue(egc, stream); + return; err: check_all_finished(egc, stream, rc); @@ -762,13 +762,33 @@ static void check_all_finished(libxl__egc *egc, { STATE_AO_GC(stream->ao); + /* + * In the case of a failure, the _abort()'s below might cancel + * synchronously on top of us, or asynchronously at a later point. + * + * We must avoid the situation where all _abort() cancel + * synchronously and the completion_callback() gets called twice; + * once by the first error and once by the final stacked abort(), + * both of whom will find that all of the tasks have stopped. + * + * To avoid this problem, any stacked re-entry into this function is + * ineligible to fire the completion callback. The outermost + * instance will take care of completing, once the stack has + * unwound. + */ + if (stream->sync_teardown) + return; + if (!stream->rc && rc) { /* First reported failure. Tear everything down. */ stream->rc = rc; + stream->sync_teardown = true; libxl__stream_read_abort(egc, stream, rc); libxl__save_helper_abort(egc, &stream->shs); libxl__conversion_helper_abort(egc, &stream->chs, rc); + + stream->sync_teardown = false; } /* Don't fire the callback until all our parallel tasks have stopped. */ diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c index 5bff52b..5135bc3 100644 --- a/tools/libxl/libxl_stream_write.c +++ b/tools/libxl/libxl_stream_write.c @@ -55,7 +55,7 @@ static void stream_success(libxl__egc *egc, static void stream_complete(libxl__egc *egc, libxl__stream_write_state *stream, int rc); static void stream_done(libxl__egc *egc, - libxl__stream_write_state *stream); + libxl__stream_write_state *stream, int rc); static void checkpoint_done(libxl__egc *egc, libxl__stream_write_state *stream, int rc); @@ -158,6 +158,7 @@ void libxl__stream_write_init(libxl__stream_write_state *stream) stream->rc = 0; stream->running = false; stream->in_checkpoint = false; + stream->sync_teardown = false; FILLZERO(stream->dc); stream->record_done_callback = NULL; FILLZERO(stream->emu_dc); @@ -275,6 +276,7 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void, } write_toolstack_record(egc, stream); + return; err: check_all_finished(egc, stream, rc); @@ -492,13 +494,11 @@ static void stream_complete(libxl__egc *egc, return; } - if (!stream->rc) - stream->rc = rc; - stream_done(egc, stream); + stream_done(egc, stream, rc); } static void stream_done(libxl__egc *egc, - libxl__stream_write_state *stream) + libxl__stream_write_state *stream, int rc) { assert(stream->running); stream->running = false; @@ -507,7 +507,7 @@ static void stream_done(libxl__egc *egc, libxl__carefd_close(stream->emu_carefd); free(stream->emu_body); - check_all_finished(egc, stream, stream->rc); + check_all_finished(egc, stream, rc); } static void checkpoint_done(libxl__egc *egc, @@ -526,12 +526,32 @@ static void check_all_finished(libxl__egc *egc, { STATE_AO_GC(stream->ao); + /* + * In the case of a failure, the _abort()'s below might cancel + * synchronously on top of us, or asynchronously at a later point. + * + * We must avoid the situation where all _abort() cancel + * synchronously and the completion_callback() gets called twice; + * once by the first error and once by the final stacked abort(), + * both of whom will find that all of the tasks have stopped. + * + * To avoid this problem, any stacked re-entry into this function is + * ineligible to fire the completion callback. The outermost + * instance will take care of completing, once the stack has + * unwound. + */ + if (stream->sync_teardown) + return; + if (!stream->rc && rc) { /* First reported failure. Tear everything down. */ stream->rc = rc; + stream->sync_teardown = true; libxl__stream_write_abort(egc, stream, rc); libxl__save_helper_abort(egc, &stream->shs); + + stream->sync_teardown = false; } /* Don't fire the callback until all our parallel tasks have stopped. */ -- 1.7.10.4