From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:51001) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YVDPS-0003Ag-4G for qemu-devel@nongnu.org; Tue, 10 Mar 2015 02:12:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YVDPQ-0001pI-8j for qemu-devel@nongnu.org; Tue, 10 Mar 2015 02:12:54 -0400 Received: from ozlabs.org ([2401:3900:2:1::2]:40953) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YVDPP-0001od-Jr for qemu-devel@nongnu.org; Tue, 10 Mar 2015 02:12:52 -0400 Date: Tue, 10 Mar 2015 17:08:24 +1100 From: David Gibson Message-ID: <20150310060824.GD11973@voom.redhat.com> References: <1424883128-9841-1-git-send-email-dgilbert@redhat.com> <1424883128-9841-13-git-send-email-dgilbert@redhat.com> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="a2FkP9tdjPU2nyhF" Content-Disposition: inline In-Reply-To: <1424883128-9841-13-git-send-email-dgilbert@redhat.com> Subject: Re: [Qemu-devel] [PATCH v5 12/45] Return path: Source handling of return path List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Dr. David Alan Gilbert (git)" Cc: aarcange@redhat.com, yamahata@private.email.ne.jp, quintela@redhat.com, qemu-devel@nongnu.org, amit.shah@redhat.com, pbonzini@redhat.com, yanghy@cn.fujitsu.com --a2FkP9tdjPU2nyhF Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Wed, Feb 25, 2015 at 04:51:35PM +0000, Dr. David Alan Gilbert (git) wrot= e: > From: "Dr. David Alan Gilbert" >=20 > Open a return path, and handle messages that are received upon it. >=20 > Signed-off-by: Dr. David Alan Gilbert > --- > include/migration/migration.h | 8 ++ > migration/migration.c | 178 ++++++++++++++++++++++++++++++++++++= +++++- > trace-events | 13 +++ > 3 files changed, 198 insertions(+), 1 deletion(-) >=20 > diff --git a/include/migration/migration.h b/include/migration/migration.h > index 6775747..5242ead 100644 > --- a/include/migration/migration.h > +++ b/include/migration/migration.h > @@ -73,6 +73,14 @@ struct MigrationState > =20 > int state; > MigrationParams params; > + > + /* State related to return path */ > + struct { > + QEMUFile *file; > + QemuThread rp_thread; > + bool error; > + } rp_state; > + > double mbps; > int64_t total_time; > int64_t downtime; > diff --git a/migration/migration.c b/migration/migration.c > index 80d234c..34cd4fe 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -237,6 +237,23 @@ MigrationCapabilityStatusList *qmp_query_migrate_cap= abilities(Error **errp) > return head; > } > =20 > +/* > + * Return true if we're already in the middle of a migration > + * (i.e. any of the active or setup states) > + */ > +static bool migration_already_active(MigrationState *ms) > +{ > + switch (ms->state) { > + case MIG_STATE_ACTIVE: > + case MIG_STATE_SETUP: > + return true; > + > + default: > + return false; > + > + } > +} > + > static void get_xbzrle_cache_stats(MigrationInfo *info) > { > if (migrate_use_xbzrle()) { > @@ -362,6 +379,21 @@ static void migrate_set_state(MigrationState *s, int= old_state, int new_state) > } > } > =20 > +static void migrate_fd_cleanup_src_rp(MigrationState *ms) > +{ > + QEMUFile *rp =3D ms->rp_state.file; > + > + /* > + * When stuff goes wrong (e.g. failing destination) on the rp, it ca= n get > + * cleaned up from a few threads; make sure not to do it twice in pa= rallel > + */ > + rp =3D atomic_cmpxchg(&ms->rp_state.file, rp, NULL); A cmpxchg seems dangerously subtle for such a basic and infrequent operation, but ok. > + if (rp) { > + trace_migrate_fd_cleanup_src_rp(); > + qemu_fclose(rp); > + } > +} > + > static void migrate_fd_cleanup(void *opaque) > { > MigrationState *s =3D opaque; > @@ -369,6 +401,8 @@ static void migrate_fd_cleanup(void *opaque) > qemu_bh_delete(s->cleanup_bh); > s->cleanup_bh =3D NULL; > =20 > + migrate_fd_cleanup_src_rp(s); > + > if (s->file) { > trace_migrate_fd_cleanup(); > qemu_mutex_unlock_iothread(); > @@ -406,6 +440,11 @@ static void migrate_fd_cancel(MigrationState *s) > QEMUFile *f =3D migrate_get_current()->file; > trace_migrate_fd_cancel(); > =20 > + if (s->rp_state.file) { > + /* shutdown the rp socket, so causing the rp thread to shutdown = */ > + qemu_file_shutdown(s->rp_state.file); I missed where qemu_file_shutdown() was implemented. Does this introduce a leftover socket dependency? > + } > + > do { > old_state =3D s->state; > if (old_state !=3D MIG_STATE_SETUP && old_state !=3D MIG_STATE_A= CTIVE) { > @@ -658,8 +697,145 @@ int64_t migrate_xbzrle_cache_size(void) > return s->xbzrle_cache_size; > } > =20 > -/* migration thread support */ > +/* > + * Something bad happened to the RP stream, mark an error > + * The caller shall print something to indicate why > + */ > +static void source_return_path_bad(MigrationState *s) > +{ > + s->rp_state.error =3D true; > + migrate_fd_cleanup_src_rp(s); > +} > + > +/* > + * Handles messages sent on the return path towards the source VM > + * > + */ > +static void *source_return_path_thread(void *opaque) > +{ > + MigrationState *ms =3D opaque; > + QEMUFile *rp =3D ms->rp_state.file; > + uint16_t expected_len, header_len, header_com; > + const int max_len =3D 512; > + uint8_t buf[max_len]; > + uint32_t tmp32; > + int res; > + > + trace_source_return_path_thread_entry(); > + while (rp && !qemu_file_get_error(rp) && > + migration_already_active(ms)) { > + trace_source_return_path_thread_loop_top(); > + header_com =3D qemu_get_be16(rp); > + header_len =3D qemu_get_be16(rp); > + > + switch (header_com) { > + case MIG_RP_CMD_SHUT: > + case MIG_RP_CMD_PONG: > + expected_len =3D 4; Could the knowledge of expected lengths be folded into the switch below? Switching twice on the same thing is a bit icky. > + break; > + > + default: > + error_report("RP: Received invalid cmd 0x%04x length 0x%04x", > + header_com, header_len); > + source_return_path_bad(ms); > + goto out; > + } > =20 > + if (header_len > expected_len) { > + error_report("RP: Received command 0x%04x with" > + "incorrect length %d expecting %d", > + header_com, header_len, > + expected_len); > + source_return_path_bad(ms); > + goto out; > + } > + > + /* We know we've got a valid header by this point */ > + res =3D qemu_get_buffer(rp, buf, header_len); > + if (res !=3D header_len) { > + trace_source_return_path_thread_failed_read_cmd_data(); > + source_return_path_bad(ms); > + goto out; > + } > + > + /* OK, we have the command and the data */ > + switch (header_com) { > + case MIG_RP_CMD_SHUT: > + tmp32 =3D be32_to_cpup((uint32_t *)buf); > + trace_source_return_path_thread_shut(tmp32); > + if (tmp32) { > + error_report("RP: Sibling indicated error %d", tmp32); > + source_return_path_bad(ms); > + } > + /* > + * We'll let the main thread deal with closing the RP > + * we could do a shutdown(2) on it, but we're the only user > + * anyway, so there's nothing gained. > + */ > + goto out; > + > + case MIG_RP_CMD_PONG: > + tmp32 =3D be32_to_cpup((uint32_t *)buf); > + trace_source_return_path_thread_pong(tmp32); > + break; > + > + default: > + /* This shouldn't happen because we should catch this above = */ > + trace_source_return_path_bad_header_com(); > + } > + /* Latest command processed, now leave a gap for the next one */ > + header_com =3D MIG_RP_CMD_INVALID; This assignment will always get overwritten. > + } > + if (rp && qemu_file_get_error(rp)) { > + trace_source_return_path_thread_bad_end(); > + source_return_path_bad(ms); > + } > + > + trace_source_return_path_thread_end(); > +out: > + return NULL; > +} > + > +__attribute__ (( unused )) /* Until later in patch series */ > +static int open_outgoing_return_path(MigrationState *ms) Uh.. surely this should be open_incoming_return_path(); it's designed to be used on the source side, AFAICT. > +{ > + > + ms->rp_state.file =3D qemu_file_get_return_path(ms->file); > + if (!ms->rp_state.file) { > + return -1; > + } > + > + trace_open_outgoing_return_path(); > + qemu_thread_create(&ms->rp_state.rp_thread, "return path", > + source_return_path_thread, ms, QEMU_THREAD_JOINAB= LE); > + > + trace_open_outgoing_return_path_continue(); > + > + return 0; > +} > + > +__attribute__ (( unused )) /* Until later in patch series */ > +static void await_outgoing_return_path_close(MigrationState *ms) Likewise "incoming" here, surely. > +{ > + /* > + * If this is a normal exit then the destination will send a SHUT an= d the > + * rp_thread will exit, however if there's an error we need to cause > + * it to exit, which we can do by a shutdown. > + * (canceling must also shutdown to stop us getting stuck here if > + * the destination died at just the wrong place) > + */ > + if (qemu_file_get_error(ms->file) && ms->rp_state.file) { > + qemu_file_shutdown(ms->rp_state.file); > + } > + trace_await_outgoing_return_path_joining(); > + qemu_thread_join(&ms->rp_state.rp_thread); > + trace_await_outgoing_return_path_close(); > +} > + > +/* > + * Master migration thread on the source VM. > + * It drives the migration and pumps the data down the outgoing channel. > + */ > static void *migration_thread(void *opaque) > { > MigrationState *s =3D opaque; > diff --git a/trace-events b/trace-events > index 4f3eff8..1951b25 100644 > --- a/trace-events > +++ b/trace-events > @@ -1374,12 +1374,25 @@ flic_no_device_api(int err) "flic: no Device Cont= ral API support %d" > flic_reset_failed(int err) "flic: reset failed %d" > =20 > # migration.c > +await_outgoing_return_path_close(void) "" > +await_outgoing_return_path_joining(void) "" > migrate_set_state(int new_state) "new state %d" > migrate_fd_cleanup(void) "" > +migrate_fd_cleanup_src_rp(void) "" > migrate_fd_error(void) "" > migrate_fd_cancel(void) "" > migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " m= ax %" PRIu64 > migrate_send_rp_message(int cmd, uint16_t len) "cmd=3D%d, len=3D%d" > +open_outgoing_return_path(void) "" > +open_outgoing_return_path_continue(void) "" > +source_return_path_thread_bad_end(void) "" > +source_return_path_bad_header_com(void) "" > +source_return_path_thread_end(void) "" > +source_return_path_thread_entry(void) "" > +source_return_path_thread_failed_read_cmd_data(void) "" > +source_return_path_thread_loop_top(void) "" > +source_return_path_thread_pong(uint32_t val) "%x" > +source_return_path_thread_shut(uint32_t val) "%x" > migrate_transferred(uint64_t tranferred, uint64_t time_spent, double ban= dwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " band= width %g max_size %" PRId64 > =20 > # migration/rdma.c --=20 David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson --a2FkP9tdjPU2nyhF Content-Type: application/pgp-signature -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAEBAgAGBQJU/opXAAoJEGw4ysog2bOSwpUP/A6FY55Lu3H8mQL7ZSBhycMl 03ZjynRn5gS45AgMDKYT3KRoUrFvdrugnrvSfRmykFyzFw4zRQOCvHvc28He3pIl V12mPDTma8VuJF0+bEiuxjSFe9/JJfWIGnS5osJ9GiTjGIHiWF6idGcP0bVmobAd 6Jt/w2tfjXYZhUnmi+IV2qI2KT7onFrpoxiwkti2wRBD47pFoF/uv0yiQEw1dslQ yNeYHc/XU+X2/DGjcUM7Z7RnCMQ9BMozDAWSwdwV3Dgg/Wzchd0/WfVF6bx2vyzM oNuf9yc5RB3sSFk4arV3l31awwhMj/o0QvF14qcgZVt6CZMkeEze/EOmsA5pX9Ay JZB6ENiD195i+GT/HIvfNiqC4ijPOqw5GBTYk1zG32MLfu1qvaXGqtdNJACWLzsp tyVl7XdcEV7qcaBkBVJmk5Ow5Nf2e1Bj+gJNnz1PGtlNiSJziR3fi9oOG/4XimZv uTVdJQnZMa1GTXHpP0Kzyu+LtbmjUHgxH77+fPheLVerQxntbsY4gsAH4vk533w7 XfJogzTNIw0umcpdE2Rw/880S0QOao0Sd0vZGiXhq4vE4LmrvgJdPUQYqFkpVkFG SBNxnujFKCv6rMqhO6IlCtujj9ryUz9sDzhy/UE+TcA4VomngUqr/AL6Y14M4OVg 4qXlHEXOtVWxnGlrG0+4 =YPmn -----END PGP SIGNATURE----- --a2FkP9tdjPU2nyhF--