xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic
@ 2015-07-27 16:47 Andrew Cooper
  2015-07-27 16:47 ` [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete() Andrew Cooper
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Andrew Cooper @ 2015-07-27 16:47 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Ian Jackson, Ian Campbell, Wei Liu

Split the original patch into 3 distinct fixes.

Andrew Cooper (3):
  tools/libxl: Do not set stream->rc in stream_complete()
  tools/libxl: Do not fire the stream callback multiple times
  tools/libxl: Only continue stream operations if the stream is still
    in progress

 tools/libxl/libxl_internal.h     |    2 ++
 tools/libxl/libxl_stream_read.c  |   45 ++++++++++++++++++++++++++++----------
 tools/libxl/libxl_stream_write.c |   36 +++++++++++++++++++++++-------
 3 files changed, 63 insertions(+), 20 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete()
  2015-07-27 16:47 [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Andrew Cooper
@ 2015-07-27 16:47 ` Andrew Cooper
  2015-07-28 10:26   ` Ian Jackson
  2015-07-27 16:47 ` [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times Andrew Cooper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-27 16:47 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Ian Jackson, Ian Campbell, Wei Liu

Only ever set stream->rc in check_all_finished().  The first version of
the migration v2 series had separate rc and joined_rc parameters, where
this logic worked.  However when combining the two, the teardown path
fails to trigger if stream_complete() records stream->rc itself.  A side
effect of this is that stream_done() needs to take an rc parameter.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
 tools/libxl/libxl_stream_read.c  |   10 ++++------
 tools/libxl/libxl_stream_write.c |   10 ++++------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 32a3551..4458aec 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -112,7 +112,7 @@ static void stream_complete(libxl__egc *egc,
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_read_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_read_state *stream);
+                        libxl__stream_read_state *stream, int rc);
 static void conversion_done(libxl__egc *egc,
                             libxl__conversion_helper_state *chs, int rc);
 static void check_all_finished(libxl__egc *egc,
@@ -669,9 +669,7 @@ static void stream_complete(libxl__egc *egc,
         return;
     }
 
-    if (!stream->rc)
-        stream->rc = rc;
-    stream_done(egc, stream);
+    stream_done(egc, stream, rc);
 }
 
 static void checkpoint_done(libxl__egc *egc,
@@ -695,7 +693,7 @@ static void checkpoint_done(libxl__egc *egc,
 }
 
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_read_state *stream)
+                        libxl__stream_read_state *stream, int rc)
 {
     libxl__sr_record_buf *rec, *trec;
 
@@ -720,7 +718,7 @@ static void stream_done(libxl__egc *egc,
     LIBXL_STAILQ_FOREACH_SAFE(rec, &stream->record_queue, entry, trec)
         free_record(rec);
 
-    check_all_finished(egc, stream, stream->rc);
+    check_all_finished(egc, stream, rc);
 }
 
 void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 5bff52b..ec46105 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -55,7 +55,7 @@ static void stream_success(libxl__egc *egc,
 static void stream_complete(libxl__egc *egc,
                             libxl__stream_write_state *stream, int rc);
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_write_state *stream);
+                        libxl__stream_write_state *stream, int rc);
 static void checkpoint_done(libxl__egc *egc,
                             libxl__stream_write_state *stream,
                             int rc);
@@ -492,13 +492,11 @@ static void stream_complete(libxl__egc *egc,
         return;
     }
 
-    if (!stream->rc)
-        stream->rc = rc;
-    stream_done(egc, stream);
+    stream_done(egc, stream, rc);
 }
 
 static void stream_done(libxl__egc *egc,
-                        libxl__stream_write_state *stream)
+                        libxl__stream_write_state *stream, int rc)
 {
     assert(stream->running);
     stream->running = false;
@@ -507,7 +505,7 @@ static void stream_done(libxl__egc *egc,
         libxl__carefd_close(stream->emu_carefd);
     free(stream->emu_body);
 
-    check_all_finished(egc, stream, stream->rc);
+    check_all_finished(egc, stream, rc);
 }
 
 static void checkpoint_done(libxl__egc *egc,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times
  2015-07-27 16:47 [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Andrew Cooper
  2015-07-27 16:47 ` [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete() Andrew Cooper
@ 2015-07-27 16:47 ` Andrew Cooper
  2015-07-28 10:27   ` Ian Jackson
  2015-07-27 16:47 ` [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress Andrew Cooper
  2015-07-28 13:27 ` [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Ian Campbell
  3 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-27 16:47 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Ian Jackson, Ian Campbell, Wei Liu

Avoid stacking of check_all_finished() via synchronous teardown of
tasks.  If the _abort() functions call back synchronously,
stream->completion_callback() ends up getting called twice, as first
and last check_all_finished() frames observe each task being finished.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
 tools/libxl/libxl_internal.h     |    2 ++
 tools/libxl/libxl_stream_read.c  |   21 +++++++++++++++++++++
 tools/libxl/libxl_stream_write.c |   21 +++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index f2466dc..e2bf327 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3010,6 +3010,7 @@ struct libxl__stream_write_state {
     int rc;
     bool running;
     bool in_checkpoint;
+    bool sync_teardown;  /* Only used to coordinate shutdown on error path. */
     libxl__save_helper_state shs;
 
     /* Main stream-writing data. */
@@ -3351,6 +3352,7 @@ struct libxl__stream_read_state {
     int rc;
     bool running;
     bool in_checkpoint;
+    bool sync_teardown; /* Only used to coordinate shutdown on error path. */
     libxl__save_helper_state shs;
     libxl__conversion_helper_state chs;
 
diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 4458aec..65100f4 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -176,6 +176,7 @@ void libxl__stream_read_init(libxl__stream_read_state *stream)
     stream->rc = 0;
     stream->running = false;
     stream->in_checkpoint = false;
+    stream->sync_teardown = false;
     libxl__save_helper_init(&stream->shs);
     libxl__conversion_helper_init(&stream->chs);
     FILLZERO(stream->dc);
@@ -760,13 +761,33 @@ static void check_all_finished(libxl__egc *egc,
 {
     STATE_AO_GC(stream->ao);
 
+    /*
+     * In the case of a failure, the _abort()'s below might cancel
+     * synchronously on top of us, or asynchronously at a later point.
+     *
+     * We must avoid the situation where all _abort() cancel
+     * synchronously and the completion_callback() gets called twice;
+     * once by the first error and once by the final stacked abort(),
+     * both of whom will find that all of the tasks have stopped.
+     *
+     * To avoid this problem, any stacked re-entry into this function is
+     * ineligible to fire the completion callback.  The outermost
+     * instance will take care of completing, once the stack has
+     * unwound.
+     */
+    if (stream->sync_teardown)
+        return;
+
     if (!stream->rc && rc) {
         /* First reported failure. Tear everything down. */
         stream->rc = rc;
+        stream->sync_teardown = true;
 
         libxl__stream_read_abort(egc, stream, rc);
         libxl__save_helper_abort(egc, &stream->shs);
         libxl__conversion_helper_abort(egc, &stream->chs, rc);
+
+        stream->sync_teardown = false;
     }
 
     /* Don't fire the callback until all our parallel tasks have stopped. */
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index ec46105..676ad0a 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -158,6 +158,7 @@ void libxl__stream_write_init(libxl__stream_write_state *stream)
     stream->rc = 0;
     stream->running = false;
     stream->in_checkpoint = false;
+    stream->sync_teardown = false;
     FILLZERO(stream->dc);
     stream->record_done_callback = NULL;
     FILLZERO(stream->emu_dc);
@@ -524,12 +525,32 @@ static void check_all_finished(libxl__egc *egc,
 {
     STATE_AO_GC(stream->ao);
 
+    /*
+     * In the case of a failure, the _abort()'s below might cancel
+     * synchronously on top of us, or asynchronously at a later point.
+     *
+     * We must avoid the situation where all _abort() cancel
+     * synchronously and the completion_callback() gets called twice;
+     * once by the first error and once by the final stacked abort(),
+     * both of whom will find that all of the tasks have stopped.
+     *
+     * To avoid this problem, any stacked re-entry into this function is
+     * ineligible to fire the completion callback.  The outermost
+     * instance will take care of completing, once the stack has
+     * unwound.
+     */
+    if (stream->sync_teardown)
+        return;
+
     if (!stream->rc && rc) {
         /* First reported failure. Tear everything down. */
         stream->rc = rc;
+        stream->sync_teardown = true;
 
         libxl__stream_write_abort(egc, stream, rc);
         libxl__save_helper_abort(egc, &stream->shs);
+
+        stream->sync_teardown = false;
     }
 
     /* Don't fire the callback until all our parallel tasks have stopped. */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-27 16:47 [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Andrew Cooper
  2015-07-27 16:47 ` [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete() Andrew Cooper
  2015-07-27 16:47 ` [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times Andrew Cooper
@ 2015-07-27 16:47 ` Andrew Cooper
  2015-07-28 13:41   ` Ian Jackson
  2015-07-28 13:27 ` [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Ian Campbell
  3 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-27 16:47 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Ian Jackson, Ian Campbell, Wei Liu

Part of the callback contract with check_all_finished() is that each
running parallel task shall call it exactly once.

Previously, it was possible for stream_continue() or
write_toolstack_record() to fail and call into check_all_finished().  As
the save helpers callback has fired, it no longer counts as in use,
which causes check_all_finished() to fire the stream callback.  Then,
unwinding the stack back and calling check_all_finished() a second time
results in the same conditions being observed, and the stream callback
being fired a second time.

To avoid this, check_all_finished() is called before any other actions
which continue the stream functionality, and the stream is only
continued if it has not been torn down.  This guarantees not to continue
stream operations if the stream does not owe a callback to
check_all_finished().

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
---
 tools/libxl/libxl_stream_read.c  |   14 ++++++++------
 tools/libxl/libxl_stream_write.c |    5 +++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c
index 65100f4..3432933 100644
--- a/tools/libxl/libxl_stream_read.c
+++ b/tools/libxl/libxl_stream_read.c
@@ -738,14 +738,16 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
         goto err;
     }
 
-    /*
-     * Libxc has indicated that it is done with the stream.  Resume reading
-     * libxl records from it.
-     */
-    stream_continue(egc, stream);
-
  err:
     check_all_finished(egc, stream, rc);
+
+    if (libxl__stream_read_inuse(stream)) {
+        /*
+         * Libxc has indicated that it is done with the stream.  Resume reading
+         * libxl records from it.
+         */
+        stream_continue(egc, stream);
+    }
 }
 
 static void conversion_done(libxl__egc *egc,
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index 676ad0a..44f8b2f 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -275,10 +275,11 @@ void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void,
         goto err;
     }
 
-    write_toolstack_record(egc, stream);
-
  err:
     check_all_finished(egc, stream, rc);
+
+    if (libxl__stream_write_inuse(stream))
+        write_toolstack_record(egc, stream);
 }
 
 static void write_toolstack_record(libxl__egc *egc,
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete()
  2015-07-27 16:47 ` [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete() Andrew Cooper
@ 2015-07-28 10:26   ` Ian Jackson
  0 siblings, 0 replies; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 10:26 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("[PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete()"):
> Only ever set stream->rc in check_all_finished().  The first version of
> the migration v2 series had separate rc and joined_rc parameters, where
> this logic worked.  However when combining the two, the teardown path
> fails to trigger if stream_complete() records stream->rc itself.  A side
> effect of this is that stream_done() needs to take an rc parameter.

Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times
  2015-07-27 16:47 ` [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times Andrew Cooper
@ 2015-07-28 10:27   ` Ian Jackson
  0 siblings, 0 replies; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 10:27 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("[PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times"):
> Avoid stacking of check_all_finished() via synchronous teardown of
> tasks.  If the _abort() functions call back synchronously,
> stream->completion_callback() ends up getting called twice, as first
> and last check_all_finished() frames observe each task being finished.

Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic
  2015-07-27 16:47 [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Andrew Cooper
                   ` (2 preceding siblings ...)
  2015-07-27 16:47 ` [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress Andrew Cooper
@ 2015-07-28 13:27 ` Ian Campbell
  3 siblings, 0 replies; 16+ messages in thread
From: Ian Campbell @ 2015-07-28 13:27 UTC (permalink / raw)
  To: Andrew Cooper, Xen-devel; +Cc: Wei Liu, Ian Jackson

On Mon, 2015-07-27 at 17:47 +0100, Andrew Cooper wrote:
> Split the original patch into 3 distinct fixes.
> 
> Andrew Cooper (3):
>   tools/libxl: Do not set stream->rc in stream_complete()
>   tools/libxl: Do not fire the stream callback multiple times

Applied with Ian's ack.

>   tools/libxl: Only continue stream operations if the stream is still
>     in progress

Ian hasn't acked this one yet, so I left it alone

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-27 16:47 ` [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress Andrew Cooper
@ 2015-07-28 13:41   ` Ian Jackson
  2015-07-28 13:56     ` Andrew Cooper
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 13:41 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("[PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
> Part of the callback contract with check_all_finished() is that each
> running parallel task shall call it exactly once.
...
> @@ -738,14 +738,16 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
>          goto err;
>      }
>  
> -    /*
> -     * Libxc has indicated that it is done with the stream.  Resume reading
> -     * libxl records from it.
> -     */
> -    stream_continue(egc, stream);
> -
>   err:
>      check_all_finished(egc, stream, rc);
> +
> +    if (libxl__stream_read_inuse(stream)) {
> +        /*
> +         * Libxc has indicated that it is done with the stream.  Resume re\
ading
> +         * libxl records from it.
> +         */
> +        stream_continue(egc, stream);
> +    }

This doesn't look convincing to me.  (Also, wrap damage in the
comment.)  It may be possible to prove that it's not buggy, but it's
certainly confusing.  In general the the which sets up the next
callback should be at the end of functions, because that way
reentrancy hazards are more easily seen to be avoided.

Why not just do this:

       /*
        * Libxc has indicated that it is done with the stream.  Resume reading
        * libxl records from it.
        */
       stream_continue(egc, stream);
  +    return;

    err:
       check_all_finished(egc, stream, rc);

?

And the same in the next one.

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 13:41   ` Ian Jackson
@ 2015-07-28 13:56     ` Andrew Cooper
  2015-07-28 15:12       ` Ian Jackson
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-28 13:56 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Wei Liu, Ian Campbell, Xen-devel

On 28/07/15 14:41, Ian Jackson wrote:
> Andrew Cooper writes ("[PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
>> Part of the callback contract with check_all_finished() is that each
>> running parallel task shall call it exactly once.
> ...
>> @@ -738,14 +738,16 @@ void libxl__xc_domain_restore_done(libxl__egc *egc, void *dcs_void,
>>          goto err;
>>      }
>>  
>> -    /*
>> -     * Libxc has indicated that it is done with the stream.  Resume reading
>> -     * libxl records from it.
>> -     */
>> -    stream_continue(egc, stream);
>> -
>>   err:
>>      check_all_finished(egc, stream, rc);
>> +
>> +    if (libxl__stream_read_inuse(stream)) {
>> +        /*
>> +         * Libxc has indicated that it is done with the stream.  Resume re\
> ading
>> +         * libxl records from it.
>> +         */
>> +        stream_continue(egc, stream);
>> +    }
> This doesn't look convincing to me.  (Also, wrap damage in the
> comment.)  It may be possible to prove that it's not buggy, but it's
> certainly confusing.  In general the the which sets up the next
> callback should be at the end of functions, because that way
> reentrancy hazards are more easily seen to be avoided.
>
> Why not just do this:
>
>        /*
>         * Libxc has indicated that it is done with the stream.  Resume reading
>         * libxl records from it.
>         */
>        stream_continue(egc, stream);
>   +    return;
>
>     err:
>        check_all_finished(egc, stream, rc);
>
> ?
>
> And the same in the next one.

That was my first attempt to fix this issue (as observed in v1), but it
is buggy.

libxl__xc_domain_restore_done() is required to call check_all_finished()
exactly once, as part of its callback contract.

Imagine a scenario whereby some error has occured and
check_all_finished() has _abort()'ed the tasks, but the save helper was
already on the way out, signalling success.

In such a case, we would both omit the check_all_finished() call, thus
omitting the eventual stream callback, and start a further action on a
now-defunct stream.

It is only save to stream_continue() if the stream is currently in use,
which is not a guaranteed situation in this function even if rc is 0.

~Andrew

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 13:56     ` Andrew Cooper
@ 2015-07-28 15:12       ` Ian Jackson
  2015-07-28 15:21         ` Andrew Cooper
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 15:12 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
> Imagine a scenario whereby some error has occured and
> check_all_finished() has _abort()'ed the tasks, but the save helper was
> already on the way out, signalling success.
...
> It is only save to stream_continue() if the stream is currently in use,
> which is not a guaranteed situation in this function even if rc is 0.

Hrm.  Yes.

What do you think about putting the inuse check in stream_continue ?

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 15:12       ` Ian Jackson
@ 2015-07-28 15:21         ` Andrew Cooper
  2015-07-28 15:59           ` Ian Jackson
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-28 15:21 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Wei Liu, Ian Campbell, Xen-devel

On 28/07/15 16:12, Ian Jackson wrote:
> Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
>> Imagine a scenario whereby some error has occured and
>> check_all_finished() has _abort()'ed the tasks, but the save helper was
>> already on the way out, signalling success.
> ...
>> It is only save to stream_continue() if the stream is currently in use,
>> which is not a guaranteed situation in this function even if rc is 0.

erm s/save/safe/

> Hrm.  Yes.
>
> What do you think about putting the inuse check in stream_continue ?

That would work on the stream_read side but not the stream_write side,
but is not really correct IMO.

The _inuse() check is needed because the save helper callback is not
sure whether the stream is in use or not.  This is a property of the
save helper callback, rather than the stream.

Pushing the _inuse() check into the next layer would function, but it
adds extra _inuse() checks to other codepaths which should be fatal if
they failed in other contexts.

Would resubmitting with extra comments explaining this suffice?

~Andrew

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 15:21         ` Andrew Cooper
@ 2015-07-28 15:59           ` Ian Jackson
  2015-07-28 16:52             ` Andrew Cooper
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 15:59 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
> On 28/07/15 16:12, Ian Jackson wrote:
> > What do you think about putting the inuse check in stream_continue ?
> 
> That would work on the stream_read side but not the stream_write side,
> but is not really correct IMO.

I have reread the code on the write side and I think I agree with you
there.  The next function to be called is write_toolstack_record so
things are a lot less abstract.

On the read side I looked at a lot of functions which contained:

      stream_continue(egc, stream);
      return;

   err:
      assert(rc);
      stream_complete(egc, stream, rc);

and ISTM that if the analysis leading to these call sites for
stream_continue is wrong, a similar hazard would exist.


> The _inuse() check is needed because the save helper callback is not
> sure whether the stream is in use or not.  This is a property of the
> save helper callback, rather than the stream.

In this event-driven code, I prefer to have a situation where the next
thing to do is obvious from the recorded state, and is calculated
explicitly (see also the way that check_all_finished is now).

This is much less confusing and error prone than code where the next
thing to do depends on _both_ the recorded state, _and_ the call path
(representing the most recent thing that happened).

> Pushing the _inuse() check into the next layer would function, but it
> adds extra _inuse() checks to other codepaths which should be fatal if
> they failed in other contexts.

I don't understand how an _inuse check in stream_continue could ever
do the wrong thing.  If the code is reorganised later, it seems more
likely to me that there will introduced new situations where we need
to do "see if we are still running, and if so do stream_continue,
otherwise go to check_all_finished".

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 15:59           ` Ian Jackson
@ 2015-07-28 16:52             ` Andrew Cooper
  2015-07-28 17:07               ` Ian Jackson
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2015-07-28 16:52 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Wei Liu, Ian Campbell, Xen-devel

On 28/07/15 16:59, Ian Jackson wrote:
> Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
>> On 28/07/15 16:12, Ian Jackson wrote:
>>> What do you think about putting the inuse check in stream_continue ?
>> That would work on the stream_read side but not the stream_write side,
>> but is not really correct IMO.
> I have reread the code on the write side and I think I agree with you
> there.  The next function to be called is write_toolstack_record so
> things are a lot less abstract.
>
> On the read side I looked at a lot of functions which contained:
>
>       stream_continue(egc, stream);
>       return;
>
>    err:
>       assert(rc);
>       stream_complete(egc, stream, rc);
>
> and ISTM that if the analysis leading to these call sites for
> stream_continue is wrong, a similar hazard would exist.

The vast majority of these cases are from internal callbacks.  i.e. they
are within the logical context of the current running stream.

The cases which come in from some external context are
libxl__stream_read_start_checkpoint() and
libxl__xc_domain_restore_done().  The latter is the problem case, while
the former asserts that the stream is in use.

>
>> The _inuse() check is needed because the save helper callback is not
>> sure whether the stream is in use or not.  This is a property of the
>> save helper callback, rather than the stream.
> In this event-driven code, I prefer to have a situation where the next
> thing to do is obvious from the recorded state, and is calculated
> explicitly (see also the way that check_all_finished is now).
>
> This is much less confusing and error prone than code where the next
> thing to do depends on _both_ the recorded state, _and_ the call path
> (representing the most recent thing that happened).

This point is specifically a join point of the end of one asynchronous
task, to another which is expected to be active and running.

It is logically different to the other cases where stream_continue() is
used.

>
>> Pushing the _inuse() check into the next layer would function, but it
>> adds extra _inuse() checks to other codepaths which should be fatal if
>> they failed in other contexts.
> I don't understand how an _inuse check in stream_continue could ever
> do the wrong thing.  If the code is reorganised later, it seems more
> likely to me that there will introduced new situations where we need
> to do "see if we are still running, and if so do stream_continue,
> otherwise go to check_all_finished".

check_all_finished() must only ever be called once for each started
task.  Having

if ( inuse )
    continue()
else
    check_all_finished()

will make it far more likely to accidentally fire the overall stream
callback twice.

~Andrew

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 16:52             ` Andrew Cooper
@ 2015-07-28 17:07               ` Ian Jackson
  2015-07-29 14:17                 ` Ian Campbell
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Jackson @ 2015-07-28 17:07 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Wei Liu, Ian Campbell, Xen-devel

Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress"):
> check_all_finished() must only ever be called once for each started
> task.  Having
> 
> if ( inuse )
>     continue()
> else
>     check_all_finished()
> 
> will make it far more likely to accidentally fire the overall stream
> callback twice.

I think extra calls to check_all_finished are harmless.

But anyway, I can see that this conversation isn't really converging.
I think there is no functional difference between the two approaches
we are debating.  So this comes down to a matter of taste.

I don't want to put my foot down as maintainer, especially since you
wrote most of this code.  So, I think the bikeshed might as well be
your favoured shade of orange:

Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-28 17:07               ` Ian Jackson
@ 2015-07-29 14:17                 ` Ian Campbell
  2015-07-29 14:18                   ` Ian Campbell
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Campbell @ 2015-07-29 14:17 UTC (permalink / raw)
  To: Ian Jackson, Andrew Cooper; +Cc: Wei Liu, Xen-devel

On Tue, 2015-07-28 at 18:07 +0100, Ian Jackson wrote:
> Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only 
> continue stream operations if the stream is still in progress"):
> > check_all_finished() must only ever be called once for each started
> > task.  Having
> > 
> > if ( inuse )
> >     continue()
> > else
> >     check_all_finished()
> > 
> > will make it far more likely to accidentally fire the overall stream
> > callback twice.
> 
> I think extra calls to check_all_finished are harmless.
> 
> But anyway, I can see that this conversation isn't really converging.
> I think there is no functional difference between the two approaches
> we are debating.  So this comes down to a matter of taste.
> 
> I don't want to put my foot down as maintainer, especially since you
> wrote most of this code.  So, I think the bikeshed might as well be
> your favoured shade of orange:
> 
> Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>

This is an ack to the patch in <
1438015647-25377-4-git-send-email-andrew.cooper3@citrix.com> i.e. the
original 3/3 of this posting, right?

(and not, for example, an ack to some other proposal made during this
thread)

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress
  2015-07-29 14:17                 ` Ian Campbell
@ 2015-07-29 14:18                   ` Ian Campbell
  0 siblings, 0 replies; 16+ messages in thread
From: Ian Campbell @ 2015-07-29 14:18 UTC (permalink / raw)
  To: Ian Jackson, Andrew Cooper; +Cc: Wei Liu, Xen-devel

On Wed, 2015-07-29 at 15:17 +0100, Ian Campbell wrote:
> On Tue, 2015-07-28 at 18:07 +0100, Ian Jackson wrote:
> > Andrew Cooper writes ("Re: [PATCH v2 for-4.6 3/3] tools/libxl: Only 
> > continue stream operations if the stream is still in progress"):
> > > check_all_finished() must only ever be called once for each started
> > > task.  Having
> > > 
> > > if ( inuse )
> > >     continue()
> > > else
> > >     check_all_finished()
> > > 
> > > will make it far more likely to accidentally fire the overall stream
> > > callback twice.
> > 
> > I think extra calls to check_all_finished are harmless.
> > 
> > But anyway, I can see that this conversation isn't really converging.
> > I think there is no functional difference between the two approaches
> > we are debating.  So this comes down to a matter of taste.
> > 
> > I don't want to put my foot down as maintainer, especially since you
> > wrote most of this code.  So, I think the bikeshed might as well be
> > your favoured shade of orange:
> > 
> > Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
> 
> This is an ack to the patch in <
> 1438015647-25377-4-git-send-email-andrew.cooper3@citrix.com> i.e. the
> original 3/3 of this posting, right?
> 
> (and not, for example, an ack to some other proposal made during this
> thread)

Nevermind, I see now that this came back in "Fix libxl TOOLSTACK records
for migration v2" with the ack in place.

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-07-29 14:18 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-27 16:47 [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Andrew Cooper
2015-07-27 16:47 ` [PATCH v2 for-4.6 1/3] tools/libxl: Do not set stream->rc in stream_complete() Andrew Cooper
2015-07-28 10:26   ` Ian Jackson
2015-07-27 16:47 ` [PATCH v2 for-4.6 2/3] tools/libxl: Do not fire the stream callback multiple times Andrew Cooper
2015-07-28 10:27   ` Ian Jackson
2015-07-27 16:47 ` [PATCH v2 for-4.6 3/3] tools/libxl: Only continue stream operations if the stream is still in progress Andrew Cooper
2015-07-28 13:41   ` Ian Jackson
2015-07-28 13:56     ` Andrew Cooper
2015-07-28 15:12       ` Ian Jackson
2015-07-28 15:21         ` Andrew Cooper
2015-07-28 15:59           ` Ian Jackson
2015-07-28 16:52             ` Andrew Cooper
2015-07-28 17:07               ` Ian Jackson
2015-07-29 14:17                 ` Ian Campbell
2015-07-29 14:18                   ` Ian Campbell
2015-07-28 13:27 ` [PATCH v2 for-4.6 0/3] Fixes to stream v2 task joining logic Ian Campbell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).