All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing
@ 2015-05-21 13:19 Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 1/8] fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase() Kevin Wolf
                   ` (8 more replies)
  0 siblings, 9 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

The hotfix for CVE-2015-3456 fixed the security problem, but didn't
fully correct the behaviour of the emulated floppy controller.  This
series fixes the bug that was the root cause for the problem, and does
some cleanup in the FIFO access functions to make the command processing
more obvious.

v2:
- Patch 3: Include fdctrl->phase in the migration state. [Peter]
- Patch 4: Added a comment to clarify an assertion [Peter]
- Patch 5: Check pos == 0 instead of fdctrl->data_pos == 1 [John]
- Patch 7: Improved commit message [John]

FWIW, when testing this, I found that migration with active I/O on a
floppy drive doesn't work very reliably. These problems were there
before the series and they stay after the series. I verified as good
as I could that the subsection magic does its job, and I'll leave
fixing the other floppy migration bugs for someone else.


Kevin Wolf (8):
  fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase()
  fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase()
  fdc: Introduce fdctrl->phase
  fdc: Use phase in fdctrl_write_data()
  fdc: Code cleanup in fdctrl_write_data()
  fdc: Disentangle phases in fdctrl_read_data()
  fdc: Fix MSR.RQM flag
  fdc-test: Test state for existing cases more thoroughly

 hw/block/fdc.c   | 296 ++++++++++++++++++++++++++++++++++++++++---------------
 tests/fdc-test.c |  34 +++++++
 2 files changed, 253 insertions(+), 77 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 1/8] fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase()
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 2/8] fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase() Kevin Wolf
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

What all callers of fdctrl_reset_fifo() really want to do is to start
the command phase, where writes to the data port initiate a new command.

The function doesn't only clear the FIFO, but also sets up the state so
that a new command can be received. Rename it to reflect this.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 hw/block/fdc.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index d8a8edd..9f95ace 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -324,7 +324,7 @@ static void fd_revalidate(FDrive *drv)
 /* Intel 82078 floppy disk controller emulation          */
 
 static void fdctrl_reset(FDCtrl *fdctrl, int do_irq);
-static void fdctrl_reset_fifo(FDCtrl *fdctrl);
+static void fdctrl_to_command_phase(FDCtrl *fdctrl);
 static int fdctrl_transfer_handler (void *opaque, int nchan,
                                     int dma_pos, int dma_len);
 static void fdctrl_raise_irq(FDCtrl *fdctrl);
@@ -918,7 +918,7 @@ static void fdctrl_reset(FDCtrl *fdctrl, int do_irq)
     fdctrl->data_dir = FD_DIR_WRITE;
     for (i = 0; i < MAX_FD; i++)
         fd_recalibrate(&fdctrl->drives[i]);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     if (do_irq) {
         fdctrl->status0 |= FD_SR0_RDYCHG;
         fdctrl_raise_irq(fdctrl);
@@ -1134,8 +1134,8 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl)
     return retval;
 }
 
-/* FIFO state control */
-static void fdctrl_reset_fifo(FDCtrl *fdctrl)
+/* Clear the FIFO and update the state for receiving the next command */
+static void fdctrl_to_command_phase(FDCtrl *fdctrl)
 {
     fdctrl->data_dir = FD_DIR_WRITE;
     fdctrl->data_pos = 0;
@@ -1533,7 +1533,7 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
         if (fdctrl->msr & FD_MSR_NONDMA) {
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
         } else {
-            fdctrl_reset_fifo(fdctrl);
+            fdctrl_to_command_phase(fdctrl);
             fdctrl_reset_irq(fdctrl);
         }
     }
@@ -1667,7 +1667,7 @@ static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction)
     fdctrl->config = fdctrl->fifo[11];
     fdctrl->precomp_trk = fdctrl->fifo[12];
     fdctrl->pwrd = fdctrl->fifo[13];
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_save(FDCtrl *fdctrl, int direction)
@@ -1746,7 +1746,7 @@ static void fdctrl_handle_specify(FDCtrl *fdctrl, int direction)
     else
         fdctrl->dor |= FD_DOR_DMAEN;
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction)
@@ -1772,7 +1772,7 @@ static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction)
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     fd_recalibrate(cur_drv);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
@@ -1808,7 +1808,7 @@ static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction)
 
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* The seek command just sends step pulses to the drive and doesn't care if
      * there is a medium inserted of if it's banging the head against the drive.
      */
@@ -1825,7 +1825,7 @@ static void fdctrl_handle_perpendicular_mode(FDCtrl *fdctrl, int direction)
     if (fdctrl->fifo[1] & 0x80)
         cur_drv->perpendicular = fdctrl->fifo[1] & 0x7;
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction)
@@ -1833,7 +1833,7 @@ static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction)
     fdctrl->config = fdctrl->fifo[2];
     fdctrl->precomp_trk =  fdctrl->fifo[3];
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction)
@@ -1846,7 +1846,7 @@ static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction)
 static void fdctrl_handle_option(FDCtrl *fdctrl, int direction)
 {
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction)
@@ -1864,7 +1864,7 @@ static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direct
             fdctrl->fifo[3] = 0;
             fdctrl_set_fifo(fdctrl, 4);
         } else {
-            fdctrl_reset_fifo(fdctrl);
+            fdctrl_to_command_phase(fdctrl);
         }
     } else if (fdctrl->data_len > 7) {
         /* ERROR */
@@ -1887,7 +1887,7 @@ static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction)
         fd_seek(cur_drv, cur_drv->head,
                 cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1);
     }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
@@ -1905,7 +1905,7 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
         fd_seek(cur_drv, cur_drv->head,
                 cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1);
     }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 2/8] fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase()
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 1/8] fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase() Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase Kevin Wolf
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

What callers really do with this function is to switch from execution
phase (including data transfers) to result phase where the guest can
read out one or more status bytes from the FIFO (the number depends on
the command).

Rename the function accordingly.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 hw/block/fdc.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 9f95ace..8c41434 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1142,8 +1142,9 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl)
     fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
 }
 
-/* Set FIFO status for the host to read */
-static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len)
+/* Update the state to allow the guest to read out the command status.
+ * @fifo_len is the number of result bytes to be read out. */
+static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
 {
     fdctrl->data_dir = FD_DIR_READ;
     fdctrl->data_len = fifo_len;
@@ -1157,7 +1158,7 @@ static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction)
     qemu_log_mask(LOG_UNIMP, "fdc: unimplemented command 0x%02x\n",
                   fdctrl->fifo[0]);
     fdctrl->fifo[0] = FD_SR0_INVCMD;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 /* Seek to next sector
@@ -1238,7 +1239,7 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
     fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
     fdctrl->msr &= ~FD_MSR_NONDMA;
 
-    fdctrl_set_fifo(fdctrl, 7);
+    fdctrl_to_result_phase(fdctrl, 7);
     fdctrl_raise_irq(fdctrl);
 }
 
@@ -1606,7 +1607,7 @@ static void fdctrl_handle_lock(FDCtrl *fdctrl, int direction)
 {
     fdctrl->lock = (fdctrl->fifo[0] & 0x80) ? 1 : 0;
     fdctrl->fifo[0] = fdctrl->lock << 4;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction)
@@ -1631,20 +1632,20 @@ static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction)
         (cur_drv->perpendicular << 2);
     fdctrl->fifo[8] = fdctrl->config;
     fdctrl->fifo[9] = fdctrl->precomp_trk;
-    fdctrl_set_fifo(fdctrl, 10);
+    fdctrl_to_result_phase(fdctrl, 10);
 }
 
 static void fdctrl_handle_version(FDCtrl *fdctrl, int direction)
 {
     /* Controller's version */
     fdctrl->fifo[0] = fdctrl->version;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_partid(FDCtrl *fdctrl, int direction)
 {
     fdctrl->fifo[0] = 0x41; /* Stepping 1 */
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction)
@@ -1697,7 +1698,7 @@ static void fdctrl_handle_save(FDCtrl *fdctrl, int direction)
     fdctrl->fifo[12] = fdctrl->pwrd;
     fdctrl->fifo[13] = 0;
     fdctrl->fifo[14] = 0;
-    fdctrl_set_fifo(fdctrl, 15);
+    fdctrl_to_result_phase(fdctrl, 15);
 }
 
 static void fdctrl_handle_readid(FDCtrl *fdctrl, int direction)
@@ -1762,7 +1763,7 @@ static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction)
         (cur_drv->head << 2) |
         GET_CUR_DRV(fdctrl) |
         0x28;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction)
@@ -1788,7 +1789,7 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
         fdctrl->reset_sensei--;
     } else if (!(fdctrl->sra & FD_SRA_INTPEND)) {
         fdctrl->fifo[0] = FD_SR0_INVCMD;
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
         return;
     } else {
         fdctrl->fifo[0] =
@@ -1797,7 +1798,7 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
     }
 
     fdctrl->fifo[1] = cur_drv->track;
-    fdctrl_set_fifo(fdctrl, 2);
+    fdctrl_to_result_phase(fdctrl, 2);
     fdctrl_reset_irq(fdctrl);
     fdctrl->status0 = FD_SR0_RDYCHG;
 }
@@ -1840,7 +1841,7 @@ static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction)
 {
     fdctrl->pwrd = fdctrl->fifo[1];
     fdctrl->fifo[0] = fdctrl->fifo[1];
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_option(FDCtrl *fdctrl, int direction)
@@ -1862,7 +1863,7 @@ static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direct
             fdctrl->fifo[0] = fdctrl->fifo[1];
             fdctrl->fifo[2] = 0;
             fdctrl->fifo[3] = 0;
-            fdctrl_set_fifo(fdctrl, 4);
+            fdctrl_to_result_phase(fdctrl, 4);
         } else {
             fdctrl_to_command_phase(fdctrl);
         }
@@ -1870,7 +1871,7 @@ static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direct
         /* ERROR */
         fdctrl->fifo[0] = 0x80 |
             (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
     }
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 1/8] fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase() Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 2/8] fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase() Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 21:55   ` John Snow
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 4/8] fdc: Use phase in fdctrl_write_data() Kevin Wolf
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

The floppy controller spec describes three different controller phases,
which are currently not explicitly modelled in our emulation. Instead,
each phase is represented by a combination of flags in registers.

This patch makes explicit in which phase the controller currently is.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 8c41434..f5bcf0b 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -495,6 +495,33 @@ enum {
     FD_DIR_DSKCHG   = 0x80,
 };
 
+/*
+ * See chapter 5.0 "Controller phases" of the spec:
+ *
+ * Command phase:
+ * The host writes a command and its parameters into the FIFO. The command
+ * phase is completed when all parameters for the command have been supplied,
+ * and execution phase is entered.
+ *
+ * Execution phase:
+ * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
+ * contains the payload now, otherwise it's unused. When all bytes of the
+ * required data have been transferred, the state is switched to either result
+ * phase (if the command produces status bytes) or directly back into the
+ * command phase for the next command.
+ *
+ * Result phase:
+ * The host reads out the FIFO, which contains one or more result bytes now.
+ */
+enum {
+    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
+    FD_PHASE_RECONSTRUCT    = 0,
+
+    FD_PHASE_COMMAND        = 1,
+    FD_PHASE_EXECUTION      = 2,
+    FD_PHASE_RESULT         = 3,
+};
+
 #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
 #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
 
@@ -504,6 +531,7 @@ struct FDCtrl {
     /* Controller state */
     QEMUTimer *result_timer;
     int dma_chann;
+    uint8_t phase;
     /* Controller's identification */
     uint8_t version;
     /* HW */
@@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = {
     }
 };
 
+/*
+ * Reconstructs the phase from register values according to the logic that was
+ * implemented in qemu 2.3. This is the default value that is used if the phase
+ * subsection is not present on migration.
+ *
+ * Don't change this function to reflect newer qemu versions, it is part of
+ * the migration ABI.
+ */
+static int reconstruct_phase(FDCtrl *fdctrl)
+{
+    if (fdctrl->msr & FD_MSR_NONDMA) {
+        return FD_PHASE_EXECUTION;
+    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
+        /* qemu 2.3 disabled RQM only during DMA transfers */
+        return FD_PHASE_EXECUTION;
+    } else if (fdctrl->msr & FD_MSR_DIO) {
+        return FD_PHASE_RESULT;
+    } else {
+        return FD_PHASE_COMMAND;
+    }
+}
+
 static void fdc_pre_save(void *opaque)
 {
     FDCtrl *s = opaque;
@@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque)
     s->dor_vmstate = s->dor | GET_CUR_DRV(s);
 }
 
+static int fdc_pre_load(void *opaque)
+{
+    FDCtrl *s = opaque;
+    s->phase = FD_PHASE_RECONSTRUCT;
+    return 0;
+}
+
 static int fdc_post_load(void *opaque, int version_id)
 {
     FDCtrl *s = opaque;
 
     SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
     s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
+
+    if (s->phase == FD_PHASE_RECONSTRUCT) {
+        s->phase = reconstruct_phase(s);
+    }
+
     return 0;
 }
 
@@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = {
     }
 };
 
+static bool fdc_phase_needed(void *opaque)
+{
+    FDCtrl *fdctrl = opaque;
+
+    return reconstruct_phase(fdctrl) != fdctrl->phase;
+}
+
+static const VMStateDescription vmstate_fdc_phase = {
+    .name = "fdc/phase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(phase, FDCtrl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_fdc = {
     .name = "fdc",
     .version_id = 2,
     .minimum_version_id = 2,
     .pre_save = fdc_pre_save,
+    .pre_load = fdc_pre_load,
     .post_load = fdc_post_load,
     .fields = (VMStateField[]) {
         /* Controller State */
@@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = {
             .vmsd = &vmstate_fdc_result_timer,
             .needed = fdc_result_timer_needed,
         } , {
+            .vmsd = &vmstate_fdc_phase,
+            .needed = fdc_phase_needed,
+        } , {
             /* empty */
         }
     }
@@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl)
 /* Clear the FIFO and update the state for receiving the next command */
 static void fdctrl_to_command_phase(FDCtrl *fdctrl)
 {
+    fdctrl->phase = FD_PHASE_COMMAND;
     fdctrl->data_dir = FD_DIR_WRITE;
     fdctrl->data_pos = 0;
     fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
@@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl)
  * @fifo_len is the number of result bytes to be read out. */
 static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
 {
+    fdctrl->phase = FD_PHASE_RESULT;
     fdctrl->data_dir = FD_DIR_READ;
     fdctrl->data_len = fifo_len;
     fdctrl->data_pos = 0;
@@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
     fdctrl_raise_irq(fdctrl);
 }
 
+/*
+ * Handlers for the execution phase of each command
+ */
 static const struct {
     uint8_t value;
     uint8_t mask;
@@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
         /* We now have all parameters
          * and will be able to treat the command
          */
+        fdctrl->phase = FD_PHASE_EXECUTION;
         if (fdctrl->data_state & FD_STATE_FORMAT) {
             fdctrl_format_sector(fdctrl);
             return;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 4/8] fdc: Use phase in fdctrl_write_data()
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (2 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup " Kevin Wolf
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

Instead of relying on a flag in the MSR to distinguish controller phases,
use the explicit phase that we store now. Assertions of the right MSR
flags are added.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 hw/block/fdc.c | 69 ++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index f5bcf0b..e3515a1 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2059,8 +2059,12 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
         return;
     }
     fdctrl->dsr &= ~FD_DSR_PWRDOWN;
-    /* Is it write command time ? */
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        /* For DMA requests, RQM should be cleared during execution phase, so
+         * we would have errored out above. */
+        assert(fdctrl->msr & FD_MSR_NONDMA);
         /* FIFO data write */
         pos = fdctrl->data_pos++;
         pos %= FD_SECTOR_LEN;
@@ -2072,12 +2076,12 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
                 < 0) {
                 FLOPPY_DPRINTF("error writing sector %d\n",
                                fd_sector(cur_drv));
-                return;
+                break;
             }
             if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
                 FLOPPY_DPRINTF("error seeking to next sector %d\n",
                                fd_sector(cur_drv));
-                return;
+                break;
             }
         }
         /* Switch from transfer mode to status mode
@@ -2085,33 +2089,42 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
          */
         if (fdctrl->data_pos == fdctrl->data_len)
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        return;
-    }
-    if (fdctrl->data_pos == 0) {
-        /* Command */
-        pos = command_to_handler[value & 0xff];
-        FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
-        fdctrl->data_len = handlers[pos].parameters + 1;
-        fdctrl->msr |= FD_MSR_CMDBUSY;
-    }
+        break;
 
-    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
-    pos = fdctrl->data_pos++;
-    pos %= FD_SECTOR_LEN;
-    fdctrl->fifo[pos] = value;
-    if (fdctrl->data_pos == fdctrl->data_len) {
-        /* We now have all parameters
-         * and will be able to treat the command
-         */
-        fdctrl->phase = FD_PHASE_EXECUTION;
-        if (fdctrl->data_state & FD_STATE_FORMAT) {
-            fdctrl_format_sector(fdctrl);
-            return;
+    case FD_PHASE_COMMAND:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+
+        if (fdctrl->data_pos == 0) {
+            /* Command */
+            pos = command_to_handler[value & 0xff];
+            FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
+            fdctrl->data_len = handlers[pos].parameters + 1;
+            fdctrl->msr |= FD_MSR_CMDBUSY;
+        }
+
+        FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
+        pos = fdctrl->data_pos++;
+        pos %= FD_SECTOR_LEN;
+        fdctrl->fifo[pos] = value;
+        if (fdctrl->data_pos == fdctrl->data_len) {
+            /* We now have all parameters
+             * and will be able to treat the command
+             */
+            fdctrl->phase = FD_PHASE_EXECUTION;
+            if (fdctrl->data_state & FD_STATE_FORMAT) {
+                fdctrl_format_sector(fdctrl);
+                break;
+            }
+
+            pos = command_to_handler[fdctrl->fifo[0] & 0xff];
+            FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
+            (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
         }
+        break;
 
-        pos = command_to_handler[fdctrl->fifo[0] & 0xff];
-        FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
-        (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
+    case FD_PHASE_RESULT:
+    default:
+        abort();
     }
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup in fdctrl_write_data()
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (3 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 4/8] fdc: Use phase in fdctrl_write_data() Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 21:34   ` John Snow
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 6/8] fdc: Disentangle phases in fdctrl_read_data() Kevin Wolf
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

Factor out a few common lines of code, reformat, improve comments.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/fdc.c | 63 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index e3515a1..1cc1e3a 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2000,14 +2000,16 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
 /*
  * Handlers for the execution phase of each command
  */
-static const struct {
+typedef struct FDCtrlCommand {
     uint8_t value;
     uint8_t mask;
     const char* name;
     int parameters;
     void (*handler)(FDCtrl *fdctrl, int direction);
     int direction;
-} handlers[] = {
+} FDCtrlCommand;
+
+static const FDCtrlCommand handlers[] = {
     { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ },
     { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE },
     { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek },
@@ -2044,9 +2046,19 @@ static const struct {
 /* Associate command to an index in the 'handlers' array */
 static uint8_t command_to_handler[256];
 
+static const FDCtrlCommand *get_command(uint8_t cmd)
+{
+    int idx;
+
+    idx = command_to_handler[cmd];
+    FLOPPY_DPRINTF("%s command\n", handlers[idx].name);
+    return &handlers[idx];
+}
+
 static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
 {
     FDrive *cur_drv;
+    const FDCtrlCommand *cmd;
     uint32_t pos;
 
     /* Reset mode */
@@ -2060,15 +2072,22 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
     }
     fdctrl->dsr &= ~FD_DSR_PWRDOWN;
 
+    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
+    pos = fdctrl->data_pos++;
+    pos %= FD_SECTOR_LEN;
+    fdctrl->fifo[pos] = value;
+
     switch (fdctrl->phase) {
     case FD_PHASE_EXECUTION:
         /* For DMA requests, RQM should be cleared during execution phase, so
          * we would have errored out above. */
         assert(fdctrl->msr & FD_MSR_NONDMA);
+
         /* FIFO data write */
-        pos = fdctrl->data_pos++;
-        pos %= FD_SECTOR_LEN;
-        fdctrl->fifo[pos] = value;
         if (pos == FD_SECTOR_LEN - 1 ||
             fdctrl->data_pos == fdctrl->data_len) {
             cur_drv = get_cur_drv(fdctrl);
@@ -2084,41 +2103,37 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
                 break;
             }
         }
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->data_pos == fdctrl->data_len)
+
+        /* Switch to result phase when done with the transfer */
+        if (fdctrl->data_pos == fdctrl->data_len) {
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+        }
         break;
 
     case FD_PHASE_COMMAND:
         assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        assert(fdctrl->data_pos < FD_SECTOR_LEN);
 
-        if (fdctrl->data_pos == 0) {
-            /* Command */
-            pos = command_to_handler[value & 0xff];
-            FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
-            fdctrl->data_len = handlers[pos].parameters + 1;
+        if (pos == 0) {
+            /* The first byte specifies the command. Now we start reading
+             * as many parameters as this command requires. */
+            cmd = get_command(value);
+            fdctrl->data_len = cmd->parameters + 1;
             fdctrl->msr |= FD_MSR_CMDBUSY;
         }
 
-        FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
-        pos = fdctrl->data_pos++;
-        pos %= FD_SECTOR_LEN;
-        fdctrl->fifo[pos] = value;
         if (fdctrl->data_pos == fdctrl->data_len) {
-            /* We now have all parameters
-             * and will be able to treat the command
-             */
+            /* We have all parameters now, execute the command */
             fdctrl->phase = FD_PHASE_EXECUTION;
+
             if (fdctrl->data_state & FD_STATE_FORMAT) {
                 fdctrl_format_sector(fdctrl);
                 break;
             }
 
-            pos = command_to_handler[fdctrl->fifo[0] & 0xff];
-            FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
-            (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
+            cmd = get_command(fdctrl->fifo[0]);
+            FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name);
+            cmd->handler(fdctrl, cmd->direction);
         }
         break;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 6/8] fdc: Disentangle phases in fdctrl_read_data()
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (4 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup " Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag Kevin Wolf
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

This commit makes similar improvements as have already been made to the
write function: Instead of relying on a flag in the MSR to distinguish
controller phases, use the explicit phase that we store now. Assertions
of the right MSR flags are added.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 hw/block/fdc.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 1cc1e3a..3c64194 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1591,9 +1591,16 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
         FLOPPY_DPRINTF("error: controller not ready for reading\n");
         return 0;
     }
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
     pos = fdctrl->data_pos;
     pos %= FD_SECTOR_LEN;
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        assert(fdctrl->msr & FD_MSR_NONDMA);
         if (pos == 0) {
             if (fdctrl->data_pos != 0)
                 if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
@@ -1609,20 +1616,26 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
                 memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
             }
         }
-    }
-    retval = fdctrl->fifo[pos];
-    if (++fdctrl->data_pos == fdctrl->data_len) {
-        fdctrl->data_pos = 0;
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->msr & FD_MSR_NONDMA) {
+
+        if (++fdctrl->data_pos == fdctrl->data_len) {
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        } else {
+        }
+        break;
+
+    case FD_PHASE_RESULT:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        if (++fdctrl->data_pos == fdctrl->data_len) {
             fdctrl_to_command_phase(fdctrl);
             fdctrl_reset_irq(fdctrl);
         }
+        break;
+
+    case FD_PHASE_COMMAND:
+    default:
+        abort();
     }
+
+    retval = fdctrl->fifo[pos];
     FLOPPY_DPRINTF("data register: 0x%02x\n", retval);
 
     return retval;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (5 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 6/8] fdc: Disentangle phases in fdctrl_read_data() Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-05-21 21:27   ` John Snow
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 8/8] fdc-test: Test state for existing cases more thoroughly Kevin Wolf
  2015-06-02 17:37 ` [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing John Snow
  8 siblings, 1 reply; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

The RQM bit in MSR should be set whenever the guest is supposed to
access the FIFO, and it should be cleared in all other cases. This is
important so the guest can't continue writing/reading the FIFO beyond
the length that it's suppossed to access (see CVE-2015-3456).

Commit e9077462 fixed the CVE by adding code that avoids the buffer
overflow; however it doesn't correct the wrong behaviour of the floppy
controller which should already have cleared RQM.

Currently, RQM stays set all the time and during all phases while a
command is being processed. This is error-prone because the command has
to explicitly clear the flag if it doesn't need data (and indeed, the
two buggy commands that are the culprits for the CVE just forgot to do
that).

This patch clears RQM immediately as soon as all bytes that are expected
have been received. If the the FIFO is used in the next phase, the flag
has to be set explicitly there.

It also clear RQM after receiving all bytes even if the phase transition
immediately sets it again. While it's technically not necessary at the
moment because the state between clearing and setting RQM is not
observable by the guest, this is more explicit and matches how real
hardware works. It will actually become necessary in qemu once
asynchronous code paths are introduced.

This alone should have been enough to fix the CVE, but now we have two
lines of defense - even better.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/fdc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 3c64194..6e79459 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1223,7 +1223,9 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl)
     fdctrl->phase = FD_PHASE_COMMAND;
     fdctrl->data_dir = FD_DIR_WRITE;
     fdctrl->data_pos = 0;
+    fdctrl->data_len = 1; /* Accept command byte, adjust for params later */
     fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
+    fdctrl->msr |= FD_MSR_RQM;
 }
 
 /* Update the state to allow the guest to read out the command status.
@@ -1438,7 +1440,7 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
         }
     }
     FLOPPY_DPRINTF("start non-DMA transfer\n");
-    fdctrl->msr |= FD_MSR_NONDMA;
+    fdctrl->msr |= FD_MSR_NONDMA | FD_MSR_RQM;
     if (direction != FD_DIR_WRITE)
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
@@ -1618,6 +1620,7 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
         }
 
         if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
         }
         break;
@@ -1625,6 +1628,7 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
     case FD_PHASE_RESULT:
         assert(!(fdctrl->msr & FD_MSR_NONDMA));
         if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
             fdctrl_to_command_phase(fdctrl);
             fdctrl_reset_irq(fdctrl);
         }
@@ -2094,6 +2098,10 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
     pos %= FD_SECTOR_LEN;
     fdctrl->fifo[pos] = value;
 
+    if (fdctrl->data_pos == fdctrl->data_len) {
+        fdctrl->msr &= ~FD_MSR_RQM;
+    }
+
     switch (fdctrl->phase) {
     case FD_PHASE_EXECUTION:
         /* For DMA requests, RQM should be cleared during execution phase, so
@@ -2132,6 +2140,9 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
              * as many parameters as this command requires. */
             cmd = get_command(value);
             fdctrl->data_len = cmd->parameters + 1;
+            if (cmd->parameters) {
+                fdctrl->msr |= FD_MSR_RQM;
+            }
             fdctrl->msr |= FD_MSR_CMDBUSY;
         }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCH v2 8/8] fdc-test: Test state for existing cases more thoroughly
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (6 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag Kevin Wolf
@ 2015-05-21 13:19 ` Kevin Wolf
  2015-06-02 17:37 ` [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing John Snow
  8 siblings, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-21 13:19 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, peter.maydell, jsnow, qemu-devel

This just adds a few additional checks of the MSR and interrupt pin to
the already existing test cases.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 tests/fdc-test.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 3c6c83c..416394f 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -218,6 +218,10 @@ static uint8_t send_read_no_dma_command(int nb_sect, uint8_t expected_st0)
         inb(FLOPPY_BASE + reg_fifo);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+    g_assert(get_irq(FLOPPY_IRQ));
+
     st0 = floppy_recv();
     if (st0 != expected_st0) {
         ret = 1;
@@ -228,8 +232,15 @@ static uint8_t send_read_no_dma_command(int nb_sect, uint8_t expected_st0)
     floppy_recv();
     floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
 
+    /* Check that we're back in command phase */
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_clear(msr, BUSY | DIO);
+    assert_bit_set(msr, RQM);
+    g_assert(!get_irq(FLOPPY_IRQ));
+
     return ret;
 }
 
@@ -403,6 +414,7 @@ static void test_read_id(void)
     uint8_t head = 0;
     uint8_t cyl;
     uint8_t st0;
+    uint8_t msr;
 
     /* Seek to track 0 and check with READ ID */
     send_seek(0);
@@ -411,18 +423,29 @@ static void test_read_id(void)
     g_assert(!get_irq(FLOPPY_IRQ));
     floppy_send(head << 2 | drive);
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    if (!get_irq(FLOPPY_IRQ)) {
+        assert_bit_set(msr, BUSY);
+        assert_bit_clear(msr, RQM);
+    }
+
     while (!get_irq(FLOPPY_IRQ)) {
         /* qemu involves a timer with READ ID... */
         clock_step(1000000000LL / 50);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+
     st0 = floppy_recv();
     floppy_recv();
     floppy_recv();
     cyl = floppy_recv();
     head = floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
+    g_assert(!get_irq(FLOPPY_IRQ));
 
     g_assert_cmpint(cyl, ==, 0);
     g_assert_cmpint(head, ==, 0);
@@ -443,18 +466,29 @@ static void test_read_id(void)
     g_assert(!get_irq(FLOPPY_IRQ));
     floppy_send(head << 2 | drive);
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    if (!get_irq(FLOPPY_IRQ)) {
+        assert_bit_set(msr, BUSY);
+        assert_bit_clear(msr, RQM);
+    }
+
     while (!get_irq(FLOPPY_IRQ)) {
         /* qemu involves a timer with READ ID... */
         clock_step(1000000000LL / 50);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+
     st0 = floppy_recv();
     floppy_recv();
     floppy_recv();
     cyl = floppy_recv();
     head = floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
+    g_assert(!get_irq(FLOPPY_IRQ));
 
     g_assert_cmpint(cyl, ==, 8);
     g_assert_cmpint(head, ==, 1);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag Kevin Wolf
@ 2015-05-21 21:27   ` John Snow
  0 siblings, 0 replies; 25+ messages in thread
From: John Snow @ 2015-05-21 21:27 UTC (permalink / raw)
  To: Kevin Wolf, qemu-block; +Cc: peter.maydell, qemu-devel



On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> The RQM bit in MSR should be set whenever the guest is supposed to
> access the FIFO, and it should be cleared in all other cases. This is
> important so the guest can't continue writing/reading the FIFO beyond
> the length that it's suppossed to access (see CVE-2015-3456).
> 
> Commit e9077462 fixed the CVE by adding code that avoids the buffer
> overflow; however it doesn't correct the wrong behaviour of the floppy
> controller which should already have cleared RQM.
> 
> Currently, RQM stays set all the time and during all phases while a
> command is being processed. This is error-prone because the command has
> to explicitly clear the flag if it doesn't need data (and indeed, the
> two buggy commands that are the culprits for the CVE just forgot to do
> that).
> 
> This patch clears RQM immediately as soon as all bytes that are expected
> have been received. If the the FIFO is used in the next phase, the flag
> has to be set explicitly there.
> 
> It also clear RQM after receiving all bytes even if the phase transition
> immediately sets it again. While it's technically not necessary at the
> moment because the state between clearing and setting RQM is not
> observable by the guest, this is more explicit and matches how real
> hardware works. It will actually become necessary in qemu once
> asynchronous code paths are introduced.
> 
> This alone should have been enough to fix the CVE, but now we have two
> lines of defense - even better.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/fdc.c | 13 ++++++++++++-
>  1 file changed, 12 insertions(+), 1 deletion(-)
> 

Reviewed-by: John Snow <jsnow@redhat.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup in fdctrl_write_data()
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup " Kevin Wolf
@ 2015-05-21 21:34   ` John Snow
  0 siblings, 0 replies; 25+ messages in thread
From: John Snow @ 2015-05-21 21:34 UTC (permalink / raw)
  To: Kevin Wolf, qemu-block; +Cc: peter.maydell, qemu-devel



On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> Factor out a few common lines of code, reformat, improve comments.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/fdc.c | 63 ++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 39 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/block/fdc.c b/hw/block/fdc.c
> index e3515a1..1cc1e3a 100644
> --- a/hw/block/fdc.c
> +++ b/hw/block/fdc.c
> @@ -2000,14 +2000,16 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
>  /*
>   * Handlers for the execution phase of each command
>   */
> -static const struct {
> +typedef struct FDCtrlCommand {
>      uint8_t value;
>      uint8_t mask;
>      const char* name;
>      int parameters;
>      void (*handler)(FDCtrl *fdctrl, int direction);
>      int direction;
> -} handlers[] = {
> +} FDCtrlCommand;
> +
> +static const FDCtrlCommand handlers[] = {
>      { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ },
>      { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE },
>      { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek },
> @@ -2044,9 +2046,19 @@ static const struct {
>  /* Associate command to an index in the 'handlers' array */
>  static uint8_t command_to_handler[256];
>  
> +static const FDCtrlCommand *get_command(uint8_t cmd)
> +{
> +    int idx;
> +
> +    idx = command_to_handler[cmd];
> +    FLOPPY_DPRINTF("%s command\n", handlers[idx].name);
> +    return &handlers[idx];
> +}
> +
>  static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
>  {
>      FDrive *cur_drv;
> +    const FDCtrlCommand *cmd;
>      uint32_t pos;
>  
>      /* Reset mode */
> @@ -2060,15 +2072,22 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
>      }
>      fdctrl->dsr &= ~FD_DSR_PWRDOWN;
>  
> +    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
> +
> +    /* If data_len spans multiple sectors, the current position in the FIFO
> +     * wraps around while fdctrl->data_pos is the real position in the whole
> +     * request. */
> +    pos = fdctrl->data_pos++;
> +    pos %= FD_SECTOR_LEN;
> +    fdctrl->fifo[pos] = value;
> +
>      switch (fdctrl->phase) {
>      case FD_PHASE_EXECUTION:
>          /* For DMA requests, RQM should be cleared during execution phase, so
>           * we would have errored out above. */
>          assert(fdctrl->msr & FD_MSR_NONDMA);
> +
>          /* FIFO data write */
> -        pos = fdctrl->data_pos++;
> -        pos %= FD_SECTOR_LEN;
> -        fdctrl->fifo[pos] = value;
>          if (pos == FD_SECTOR_LEN - 1 ||
>              fdctrl->data_pos == fdctrl->data_len) {
>              cur_drv = get_cur_drv(fdctrl);
> @@ -2084,41 +2103,37 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
>                  break;
>              }
>          }
> -        /* Switch from transfer mode to status mode
> -         * then from status mode to command mode
> -         */
> -        if (fdctrl->data_pos == fdctrl->data_len)
> +
> +        /* Switch to result phase when done with the transfer */
> +        if (fdctrl->data_pos == fdctrl->data_len) {
>              fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
> +        }
>          break;
>  
>      case FD_PHASE_COMMAND:
>          assert(!(fdctrl->msr & FD_MSR_NONDMA));
> +        assert(fdctrl->data_pos < FD_SECTOR_LEN);
>  
> -        if (fdctrl->data_pos == 0) {
> -            /* Command */
> -            pos = command_to_handler[value & 0xff];
> -            FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
> -            fdctrl->data_len = handlers[pos].parameters + 1;
> +        if (pos == 0) {
> +            /* The first byte specifies the command. Now we start reading
> +             * as many parameters as this command requires. */
> +            cmd = get_command(value);
> +            fdctrl->data_len = cmd->parameters + 1;
>              fdctrl->msr |= FD_MSR_CMDBUSY;
>          }
>  
> -        FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
> -        pos = fdctrl->data_pos++;
> -        pos %= FD_SECTOR_LEN;
> -        fdctrl->fifo[pos] = value;
>          if (fdctrl->data_pos == fdctrl->data_len) {
> -            /* We now have all parameters
> -             * and will be able to treat the command
> -             */
> +            /* We have all parameters now, execute the command */
>              fdctrl->phase = FD_PHASE_EXECUTION;
> +
>              if (fdctrl->data_state & FD_STATE_FORMAT) {
>                  fdctrl_format_sector(fdctrl);
>                  break;
>              }
>  
> -            pos = command_to_handler[fdctrl->fifo[0] & 0xff];
> -            FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
> -            (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
> +            cmd = get_command(fdctrl->fifo[0]);
> +            FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name);
> +            cmd->handler(fdctrl, cmd->direction);
>          }
>          break;
>  
> 

Reviewed-by: John Snow <jsnow@redhat.com>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase Kevin Wolf
@ 2015-05-21 21:55   ` John Snow
  2015-05-28 17:29     ` Dr. David Alan Gilbert
  0 siblings, 1 reply; 25+ messages in thread
From: John Snow @ 2015-05-21 21:55 UTC (permalink / raw)
  To: Kevin Wolf, Dr. David Alan Gilbert; +Cc: peter.maydell, qemu-devel, qemu-block



On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> The floppy controller spec describes three different controller phases,
> which are currently not explicitly modelled in our emulation. Instead,
> each phase is represented by a combination of flags in registers.
> 
> This patch makes explicit in which phase the controller currently is.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 89 insertions(+)
> 
> diff --git a/hw/block/fdc.c b/hw/block/fdc.c
> index 8c41434..f5bcf0b 100644
> --- a/hw/block/fdc.c
> +++ b/hw/block/fdc.c
> @@ -495,6 +495,33 @@ enum {
>      FD_DIR_DSKCHG   = 0x80,
>  };
>  
> +/*
> + * See chapter 5.0 "Controller phases" of the spec:
> + *
> + * Command phase:
> + * The host writes a command and its parameters into the FIFO. The command
> + * phase is completed when all parameters for the command have been supplied,
> + * and execution phase is entered.
> + *
> + * Execution phase:
> + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
> + * contains the payload now, otherwise it's unused. When all bytes of the
> + * required data have been transferred, the state is switched to either result
> + * phase (if the command produces status bytes) or directly back into the
> + * command phase for the next command.
> + *
> + * Result phase:
> + * The host reads out the FIFO, which contains one or more result bytes now.
> + */
> +enum {
> +    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
> +    FD_PHASE_RECONSTRUCT    = 0,
> +
> +    FD_PHASE_COMMAND        = 1,
> +    FD_PHASE_EXECUTION      = 2,
> +    FD_PHASE_RESULT         = 3,
> +};
> +
>  #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
>  #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
>  
> @@ -504,6 +531,7 @@ struct FDCtrl {
>      /* Controller state */
>      QEMUTimer *result_timer;
>      int dma_chann;
> +    uint8_t phase;
>      /* Controller's identification */
>      uint8_t version;
>      /* HW */
> @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = {
>      }
>  };
>  
> +/*
> + * Reconstructs the phase from register values according to the logic that was
> + * implemented in qemu 2.3. This is the default value that is used if the phase
> + * subsection is not present on migration.
> + *
> + * Don't change this function to reflect newer qemu versions, it is part of
> + * the migration ABI.
> + */
> +static int reconstruct_phase(FDCtrl *fdctrl)
> +{
> +    if (fdctrl->msr & FD_MSR_NONDMA) {
> +        return FD_PHASE_EXECUTION;
> +    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
> +        /* qemu 2.3 disabled RQM only during DMA transfers */
> +        return FD_PHASE_EXECUTION;
> +    } else if (fdctrl->msr & FD_MSR_DIO) {
> +        return FD_PHASE_RESULT;
> +    } else {
> +        return FD_PHASE_COMMAND;
> +    }
> +}
> +
>  static void fdc_pre_save(void *opaque)
>  {
>      FDCtrl *s = opaque;
> @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque)
>      s->dor_vmstate = s->dor | GET_CUR_DRV(s);
>  }
>  
> +static int fdc_pre_load(void *opaque)
> +{
> +    FDCtrl *s = opaque;
> +    s->phase = FD_PHASE_RECONSTRUCT;
> +    return 0;
> +}
> +
>  static int fdc_post_load(void *opaque, int version_id)
>  {
>      FDCtrl *s = opaque;
>  
>      SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
>      s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
> +
> +    if (s->phase == FD_PHASE_RECONSTRUCT) {
> +        s->phase = reconstruct_phase(s);
> +    }
> +
>      return 0;
>  }
>  
> @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = {
>      }
>  };
>  
> +static bool fdc_phase_needed(void *opaque)
> +{
> +    FDCtrl *fdctrl = opaque;
> +
> +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> +}
> +
> +static const VMStateDescription vmstate_fdc_phase = {
> +    .name = "fdc/phase",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT8(phase, FDCtrl),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_fdc = {
>      .name = "fdc",
>      .version_id = 2,
>      .minimum_version_id = 2,
>      .pre_save = fdc_pre_save,
> +    .pre_load = fdc_pre_load,
>      .post_load = fdc_post_load,
>      .fields = (VMStateField[]) {
>          /* Controller State */
> @@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = {
>              .vmsd = &vmstate_fdc_result_timer,
>              .needed = fdc_result_timer_needed,
>          } , {
> +            .vmsd = &vmstate_fdc_phase,
> +            .needed = fdc_phase_needed,
> +        } , {
>              /* empty */
>          }
>      }
> @@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl)
>  /* Clear the FIFO and update the state for receiving the next command */
>  static void fdctrl_to_command_phase(FDCtrl *fdctrl)
>  {
> +    fdctrl->phase = FD_PHASE_COMMAND;
>      fdctrl->data_dir = FD_DIR_WRITE;
>      fdctrl->data_pos = 0;
>      fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
> @@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl)
>   * @fifo_len is the number of result bytes to be read out. */
>  static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
>  {
> +    fdctrl->phase = FD_PHASE_RESULT;
>      fdctrl->data_dir = FD_DIR_READ;
>      fdctrl->data_len = fifo_len;
>      fdctrl->data_pos = 0;
> @@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
>      fdctrl_raise_irq(fdctrl);
>  }
>  
> +/*
> + * Handlers for the execution phase of each command
> + */
>  static const struct {
>      uint8_t value;
>      uint8_t mask;
> @@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
>          /* We now have all parameters
>           * and will be able to treat the command
>           */
> +        fdctrl->phase = FD_PHASE_EXECUTION;
>          if (fdctrl->data_state & FD_STATE_FORMAT) {
>              fdctrl_format_sector(fdctrl);
>              return;
> 

Acked-by: John Snow <jsnow@redhat.com>

Looks ok to me, CC David Gilbert for a migration look-see.

--js

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-21 21:55   ` John Snow
@ 2015-05-28 17:29     ` Dr. David Alan Gilbert
  2015-05-29  7:50       ` Markus Armbruster
  0 siblings, 1 reply; 25+ messages in thread
From: Dr. David Alan Gilbert @ 2015-05-28 17:29 UTC (permalink / raw)
  To: John Snow; +Cc: Kevin Wolf, peter.maydell, qemu-devel, qemu-block

* John Snow (jsnow@redhat.com) wrote:
> 
> 
> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > The floppy controller spec describes three different controller phases,
> > which are currently not explicitly modelled in our emulation. Instead,
> > each phase is represented by a combination of flags in registers.
> > 
> > This patch makes explicit in which phase the controller currently is.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 89 insertions(+)
> > 
> > diff --git a/hw/block/fdc.c b/hw/block/fdc.c
> > index 8c41434..f5bcf0b 100644
> > --- a/hw/block/fdc.c
> > +++ b/hw/block/fdc.c
> > @@ -495,6 +495,33 @@ enum {
> >      FD_DIR_DSKCHG   = 0x80,
> >  };
> >  
> > +/*
> > + * See chapter 5.0 "Controller phases" of the spec:
> > + *
> > + * Command phase:
> > + * The host writes a command and its parameters into the FIFO. The command
> > + * phase is completed when all parameters for the command have been supplied,
> > + * and execution phase is entered.
> > + *
> > + * Execution phase:
> > + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
> > + * contains the payload now, otherwise it's unused. When all bytes of the
> > + * required data have been transferred, the state is switched to either result
> > + * phase (if the command produces status bytes) or directly back into the
> > + * command phase for the next command.
> > + *
> > + * Result phase:
> > + * The host reads out the FIFO, which contains one or more result bytes now.
> > + */
> > +enum {
> > +    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
> > +    FD_PHASE_RECONSTRUCT    = 0,
> > +
> > +    FD_PHASE_COMMAND        = 1,
> > +    FD_PHASE_EXECUTION      = 2,
> > +    FD_PHASE_RESULT         = 3,
> > +};
> > +
> >  #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
> >  #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
> >  
> > @@ -504,6 +531,7 @@ struct FDCtrl {
> >      /* Controller state */
> >      QEMUTimer *result_timer;
> >      int dma_chann;
> > +    uint8_t phase;
> >      /* Controller's identification */
> >      uint8_t version;
> >      /* HW */
> > @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = {
> >      }
> >  };
> >  
> > +/*
> > + * Reconstructs the phase from register values according to the logic that was
> > + * implemented in qemu 2.3. This is the default value that is used if the phase
> > + * subsection is not present on migration.
> > + *
> > + * Don't change this function to reflect newer qemu versions, it is part of
> > + * the migration ABI.
> > + */
> > +static int reconstruct_phase(FDCtrl *fdctrl)
> > +{
> > +    if (fdctrl->msr & FD_MSR_NONDMA) {
> > +        return FD_PHASE_EXECUTION;
> > +    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
> > +        /* qemu 2.3 disabled RQM only during DMA transfers */
> > +        return FD_PHASE_EXECUTION;
> > +    } else if (fdctrl->msr & FD_MSR_DIO) {
> > +        return FD_PHASE_RESULT;
> > +    } else {
> > +        return FD_PHASE_COMMAND;
> > +    }
> > +}
> > +
> >  static void fdc_pre_save(void *opaque)
> >  {
> >      FDCtrl *s = opaque;
> > @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque)
> >      s->dor_vmstate = s->dor | GET_CUR_DRV(s);
> >  }
> >  
> > +static int fdc_pre_load(void *opaque)
> > +{
> > +    FDCtrl *s = opaque;
> > +    s->phase = FD_PHASE_RECONSTRUCT;
> > +    return 0;
> > +}
> > +
> >  static int fdc_post_load(void *opaque, int version_id)
> >  {
> >      FDCtrl *s = opaque;
> >  
> >      SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
> >      s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
> > +
> > +    if (s->phase == FD_PHASE_RECONSTRUCT) {
> > +        s->phase = reconstruct_phase(s);
> > +    }
> > +
> >      return 0;
> >  }
> >  
> > @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = {
> >      }
> >  };
> >  
> > +static bool fdc_phase_needed(void *opaque)
> > +{
> > +    FDCtrl *fdctrl = opaque;
> > +
> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > +}

OK, that is one way of doing it which should work, as long
as most of the time that matches.

My preference for subsections is normally to make them just
conditional on machine type, that way backwards migration just
works.  However, if the result of the backwards migration would
be data corruption (which if I understand what you saying it could
in this case) then it's arguably better to fail migration in those
cases.
You might like to add a comment to that effect.

Would I be correct in thinking that all our common OSs
end up not sending this section while running and not
accessing the floppy?

Dave

> > +
> > +static const VMStateDescription vmstate_fdc_phase = {
> > +    .name = "fdc/phase",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_UINT8(phase, FDCtrl),
> > +        VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >  static const VMStateDescription vmstate_fdc = {
> >      .name = "fdc",
> >      .version_id = 2,
> >      .minimum_version_id = 2,
> >      .pre_save = fdc_pre_save,
> > +    .pre_load = fdc_pre_load,
> >      .post_load = fdc_post_load,
> >      .fields = (VMStateField[]) {
> >          /* Controller State */
> > @@ -839,6 +919,9 @@ static const VMStateDescription vmstate_fdc = {
> >              .vmsd = &vmstate_fdc_result_timer,
> >              .needed = fdc_result_timer_needed,
> >          } , {
> > +            .vmsd = &vmstate_fdc_phase,
> > +            .needed = fdc_phase_needed,
> > +        } , {
> >              /* empty */
> >          }
> >      }
> > @@ -1137,6 +1220,7 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl)
> >  /* Clear the FIFO and update the state for receiving the next command */
> >  static void fdctrl_to_command_phase(FDCtrl *fdctrl)
> >  {
> > +    fdctrl->phase = FD_PHASE_COMMAND;
> >      fdctrl->data_dir = FD_DIR_WRITE;
> >      fdctrl->data_pos = 0;
> >      fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
> > @@ -1146,6 +1230,7 @@ static void fdctrl_to_command_phase(FDCtrl *fdctrl)
> >   * @fifo_len is the number of result bytes to be read out. */
> >  static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
> >  {
> > +    fdctrl->phase = FD_PHASE_RESULT;
> >      fdctrl->data_dir = FD_DIR_READ;
> >      fdctrl->data_len = fifo_len;
> >      fdctrl->data_pos = 0;
> > @@ -1912,6 +1997,9 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
> >      fdctrl_raise_irq(fdctrl);
> >  }
> >  
> > +/*
> > + * Handlers for the execution phase of each command
> > + */
> >  static const struct {
> >      uint8_t value;
> >      uint8_t mask;
> > @@ -2015,6 +2103,7 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
> >          /* We now have all parameters
> >           * and will be able to treat the command
> >           */
> > +        fdctrl->phase = FD_PHASE_EXECUTION;
> >          if (fdctrl->data_state & FD_STATE_FORMAT) {
> >              fdctrl_format_sector(fdctrl);
> >              return;
> > 
> 
> Acked-by: John Snow <jsnow@redhat.com>
> 
> Looks ok to me, CC David Gilbert for a migration look-see.
> 
> --js
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-28 17:29     ` Dr. David Alan Gilbert
@ 2015-05-29  7:50       ` Markus Armbruster
  2015-05-29  8:33         ` Dr. David Alan Gilbert
  0 siblings, 1 reply; 25+ messages in thread
From: Markus Armbruster @ 2015-05-29  7:50 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, peter.maydell, John Snow, qemu-devel, qemu-block

"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:

> * John Snow (jsnow@redhat.com) wrote:
>> 
>> 
>> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
>> > The floppy controller spec describes three different controller phases,
>> > which are currently not explicitly modelled in our emulation. Instead,
>> > each phase is represented by a combination of flags in registers.
>> > 
>> > This patch makes explicit in which phase the controller currently is.
>> > 
>> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>> > ---
>> >  hw/block/fdc.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 89 insertions(+)
>> > 
>> > diff --git a/hw/block/fdc.c b/hw/block/fdc.c
>> > index 8c41434..f5bcf0b 100644
>> > --- a/hw/block/fdc.c
>> > +++ b/hw/block/fdc.c
>> > @@ -495,6 +495,33 @@ enum {
>> >      FD_DIR_DSKCHG   = 0x80,
>> >  };
>> >  
>> > +/*
>> > + * See chapter 5.0 "Controller phases" of the spec:
>> > + *
>> > + * Command phase:
>> > + * The host writes a command and its parameters into the FIFO. The command
>> > + * phase is completed when all parameters for the command have been supplied,
>> > + * and execution phase is entered.
>> > + *
>> > + * Execution phase:
>> > + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
>> > + * contains the payload now, otherwise it's unused. When all bytes of the
>> > + * required data have been transferred, the state is switched to either result
>> > + * phase (if the command produces status bytes) or directly back into the
>> > + * command phase for the next command.
>> > + *
>> > + * Result phase:
>> > + * The host reads out the FIFO, which contains one or more result bytes now.
>> > + */
>> > +enum {
>> > +    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
>> > +    FD_PHASE_RECONSTRUCT    = 0,
>> > +
>> > +    FD_PHASE_COMMAND        = 1,
>> > +    FD_PHASE_EXECUTION      = 2,
>> > +    FD_PHASE_RESULT         = 3,
>> > +};
>> > +
>> >  #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
>> >  #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
>> >  
>> > @@ -504,6 +531,7 @@ struct FDCtrl {
>> >      /* Controller state */
>> >      QEMUTimer *result_timer;
>> >      int dma_chann;
>> > +    uint8_t phase;
>> >      /* Controller's identification */
>> >      uint8_t version;
>> >      /* HW */
>> > @@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = {
>> >      }
>> >  };
>> >  
>> > +/*
>> > + * Reconstructs the phase from register values according to the logic that was
>> > + * implemented in qemu 2.3. This is the default value that is used if the phase
>> > + * subsection is not present on migration.
>> > + *
>> > + * Don't change this function to reflect newer qemu versions, it is part of
>> > + * the migration ABI.
>> > + */
>> > +static int reconstruct_phase(FDCtrl *fdctrl)
>> > +{
>> > +    if (fdctrl->msr & FD_MSR_NONDMA) {
>> > +        return FD_PHASE_EXECUTION;
>> > +    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
>> > +        /* qemu 2.3 disabled RQM only during DMA transfers */
>> > +        return FD_PHASE_EXECUTION;
>> > +    } else if (fdctrl->msr & FD_MSR_DIO) {
>> > +        return FD_PHASE_RESULT;
>> > +    } else {
>> > +        return FD_PHASE_COMMAND;
>> > +    }
>> > +}
>> > +
>> >  static void fdc_pre_save(void *opaque)
>> >  {
>> >      FDCtrl *s = opaque;
>> > @@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque)
>> >      s->dor_vmstate = s->dor | GET_CUR_DRV(s);
>> >  }
>> >  
>> > +static int fdc_pre_load(void *opaque)
>> > +{
>> > +    FDCtrl *s = opaque;
>> > +    s->phase = FD_PHASE_RECONSTRUCT;
>> > +    return 0;
>> > +}
>> > +
>> >  static int fdc_post_load(void *opaque, int version_id)
>> >  {
>> >      FDCtrl *s = opaque;
>> >  
>> >      SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
>> >      s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
>> > +
>> > +    if (s->phase == FD_PHASE_RECONSTRUCT) {
>> > +        s->phase = reconstruct_phase(s);
>> > +    }
>> > +
>> >      return 0;
>> >  }
>> >  
>> > @@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = {
>> >      }
>> >  };
>> >  
>> > +static bool fdc_phase_needed(void *opaque)
>> > +{
>> > +    FDCtrl *fdctrl = opaque;
>> > +
>> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
>> > +}
>
> OK, that is one way of doing it which should work, as long
> as most of the time that matches.
>
> My preference for subsections is normally to make them just
> conditional on machine type, that way backwards migration just
> works.  However, if the result of the backwards migration would
> be data corruption (which if I understand what you saying it could
> in this case) then it's arguably better to fail migration in those
> cases.

This isn't a matter of preference.

Subsections were designed to solves a problem we only have because we're
not perfect: device model bugs, namely forgetting to migrate a bit of
state, or failing to model it in the first place.

Subsections tied to machine types solve an entirely different problem:
the old version of the device is just fine, including migration, but we
now want a new and improved variant, which needs some more state
migrated.  Putting that piece of state into a subsection tied to the new
machine type avoids code duplication.

But what do you do for device model bugs serious enough to need fixing
even for existing machine types, when the fix needs additional state
migrated?  Subsections tied to machine types are *not* a solution there.
That's why this isn't about preference!  It's about having a bug to fix
or not.

You seem rather reluctant to use subsections for their intended purpose.
I don't understand; their correctness argument is really simple.

Fundamentally, we're picking a pair of state serialization and
deserialization functions (in the mathematical sense).

The obvious encoding function for a state consisting of pieces is to
encode all the pieces one by one.  Call this e(), and its decoding
function d().

Here's an alternative pair e'() and d'():

* Pick a piece of the state.

* Pick one of its values.  Let's call it "the prevalent value" for
  reasons that will become apparent shortly.

* Encode just like e() does, except omit this piece of state if and only
  if it has its prevalent value.

* In the decoding function, default this piece of state to its prevalent
  value.

The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x.

At this point, you might want to tell me not to waste your time with
trivial math.  If so, good!  The triviality is exactly my point.

Straightforwardly translated to code, e() and d() put everything in the
section.  Drawback: add/remove/modify of state kills migration to and
from older versions.

e'() and d'() put a piece of state in a subsection that is only present
when the piece doesn't have its prevalent value.

Because the encodings are equivalent, forward migration transfers
exactly the same state regardless of which of them we choose.

So why not choose one that makes backward migration work exactly when
it's safe?

* Put all pieces of state the old version doesn't understand in
  subsections.

* Identify prevalent state.

* Double-check the old version behaves safely in prevalent state.

* In prevalent state, no subsections are sent, and migration succeeds.

* In any other state, migration fails.

Naturally, this is useful only when the prevalent state is actually
prevalent.

> You might like to add a comment to that effect.
>
> Would I be correct in thinking that all our common OSs
> end up not sending this section while running and not
> accessing the floppy?

Yes, the prevalent state better be prevalent.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29  7:50       ` Markus Armbruster
@ 2015-05-29  8:33         ` Dr. David Alan Gilbert
  2015-05-29  9:11           ` Kevin Wolf
  2015-06-01 12:46           ` Markus Armbruster
  0 siblings, 2 replies; 25+ messages in thread
From: Dr. David Alan Gilbert @ 2015-05-29  8:33 UTC (permalink / raw)
  To: Markus Armbruster
  Cc: Kevin Wolf, peter.maydell, John Snow, qemu-devel, qemu-block

* Markus Armbruster (armbru@redhat.com) wrote:
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> 
> > * John Snow (jsnow@redhat.com) wrote:
> >> 
> >> 
> >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> >> > The floppy controller spec describes three different controller phases,
> >> > which are currently not explicitly modelled in our emulation. Instead,
> >> > each phase is represented by a combination of flags in registers.
> >> > 
> >> > This patch makes explicit in which phase the controller currently is.
> >> > 
> >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>

<snip>

> >> > +static bool fdc_phase_needed(void *opaque)
> >> > +{
> >> > +    FDCtrl *fdctrl = opaque;
> >> > +
> >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> >> > +}
> >
> > OK, that is one way of doing it which should work, as long
> > as most of the time that matches.
> >
> > My preference for subsections is normally to make them just
> > conditional on machine type, that way backwards migration just
> > works.  However, if the result of the backwards migration would
> > be data corruption (which if I understand what you saying it could
> > in this case) then it's arguably better to fail migration in those
> > cases.
> 
> This isn't a matter of preference.
> 
> Subsections were designed to solves a problem we only have because we're
> not perfect: device model bugs, namely forgetting to migrate a bit of
> state, or failing to model it in the first place.
> 
> Subsections tied to machine types solve an entirely different problem:
> the old version of the device is just fine, including migration, but we
> now want a new and improved variant, which needs some more state
> migrated.  Putting that piece of state into a subsection tied to the new
> machine type avoids code duplication.
> 
> But what do you do for device model bugs serious enough to need fixing
> even for existing machine types, when the fix needs additional state
> migrated?  Subsections tied to machine types are *not* a solution there.
> That's why this isn't about preference!  It's about having a bug to fix
> or not.

My problem is that people keep adding subsections to fix minor device
bugs because they think breaking migration is irrelevant.  If the bug
being fixed causes data corruption then OK I agree it is probably better
to break migration, otherwise  you need to make a decision about whether
fixing the device bug during migration is better or worse than having
the migration fail.
Some people say 'Migration failure is ok - people just try it again';
sorry, no, that's useless in environments where the VM has 100s of GB
of RAM and takes hours to migrate only for it then to fail, and it
was urgent that it was migrated off that source host.

> You seem rather reluctant to use subsections for their intended purpose.
> I don't understand; their correctness argument is really simple.

Correct, I am; it encourages people to break migration compatibility
too strongly.

Now, since I have to worry about backwards-migration downstream
this influences my choice.   People keep telling me that upstream
doesn't care about backwards migration compatibility, but I've not
seen any evidence of that, indeed I've seen other people trying
to put backwards migration fixes in.

> Fundamentally, we're picking a pair of state serialization and
> deserialization functions (in the mathematical sense).
> 
> The obvious encoding function for a state consisting of pieces is to
> encode all the pieces one by one.  Call this e(), and its decoding
> function d().
> 
> Here's an alternative pair e'() and d'():
> 
> * Pick a piece of the state.
> 
> * Pick one of its values.  Let's call it "the prevalent value" for
>   reasons that will become apparent shortly.
> 
> * Encode just like e() does, except omit this piece of state if and only
>   if it has its prevalent value.
> 
> * In the decoding function, default this piece of state to its prevalent
>   value.
> 
> The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x.
> 
> At this point, you might want to tell me not to waste your time with
> trivial math.  If so, good!  The triviality is exactly my point.

Well I was going to mention it, but since you already have I wont bother....
Actually, the problem here is that the symbols look trivial but they're
meaningless without the following textual description, and that's
where the problems are.

> Straightforwardly translated to code, e() and d() put everything in the
> section.  Drawback: add/remove/modify of state kills migration to and
> from older versions.
> 
> e'() and d'() put a piece of state in a subsection that is only present
> when the piece doesn't have its prevalent value.
> 
> Because the encodings are equivalent, forward migration transfers
> exactly the same state regardless of which of them we choose.
> 
> So why not choose one that makes backward migration work exactly when
> it's safe?
> 
> * Put all pieces of state the old version doesn't understand in
>   subsections.
> 
> * Identify prevalent state.
> 
> * Double-check the old version behaves safely in prevalent state.
> 
> * In prevalent state, no subsections are sent, and migration succeeds.
> 
> * In any other state, migration fails.
> 
> Naturally, this is useful only when the prevalent state is actually
> prevalent.

The 'prevalent state' idea is what I hate about the use of subsections.

It's very very difficult to reason about:
   1) subtle changes in either the guest OS or the implementation can
      change what the 'prevalent state' is.
   2) Are you sure that the prevalent state is the same on Windows n+1?
   3) What about during boot?
   4) What about during installation?

It's then roulette as to whether a migration will work.
It's also a nightmare to test, having bugs where migration fails 1/100
times on some random OS in a production environment is horrible, and
that's exactly what the 'prevalent state' design produces.

I work to the rule that:
   a) If the migration bug is such that the effect of the missing state
      is really bad; a hung guest, or data corruption - then sure
      use a subsection based on a 'prevalent state' decision.

   b) Else tie it to the machine type.

   c) A combination is even better; i.e. always send the state
      on new machine types, just because it makes the behaviour
      more consistent.

> > You might like to add a comment to that effect.
> >
> > Would I be correct in thinking that all our common OSs
> > end up not sending this section while running and not
> > accessing the floppy?
> 
> Yes, the prevalent state better be prevalent.

Dave

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29  8:33         ` Dr. David Alan Gilbert
@ 2015-05-29  9:11           ` Kevin Wolf
  2015-05-29  9:38             ` Dr. David Alan Gilbert
  2015-06-01 12:46           ` Markus Armbruster
  1 sibling, 1 reply; 25+ messages in thread
From: Kevin Wolf @ 2015-05-29  9:11 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: peter.maydell, John Snow, Markus Armbruster, qemu-block, qemu-devel

Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben:
> * Markus Armbruster (armbru@redhat.com) wrote:
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> > 
> > > * John Snow (jsnow@redhat.com) wrote:
> > >> 
> > >> 
> > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > >> > The floppy controller spec describes three different controller phases,
> > >> > which are currently not explicitly modelled in our emulation. Instead,
> > >> > each phase is represented by a combination of flags in registers.
> > >> > 
> > >> > This patch makes explicit in which phase the controller currently is.
> > >> > 
> > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> 
> <snip>
> 
> > >> > +static bool fdc_phase_needed(void *opaque)
> > >> > +{
> > >> > +    FDCtrl *fdctrl = opaque;
> > >> > +
> > >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > >> > +}
> > >
> > > OK, that is one way of doing it which should work, as long
> > > as most of the time that matches.
> > >
> > > My preference for subsections is normally to make them just
> > > conditional on machine type, that way backwards migration just
> > > works.  However, if the result of the backwards migration would
> > > be data corruption (which if I understand what you saying it could
> > > in this case) then it's arguably better to fail migration in those
> > > cases.
> > 
> > This isn't a matter of preference.
> > 
> > Subsections were designed to solves a problem we only have because we're
> > not perfect: device model bugs, namely forgetting to migrate a bit of
> > state, or failing to model it in the first place.
> > 
> > Subsections tied to machine types solve an entirely different problem:
> > the old version of the device is just fine, including migration, but we
> > now want a new and improved variant, which needs some more state
> > migrated.  Putting that piece of state into a subsection tied to the new
> > machine type avoids code duplication.
> > 
> > But what do you do for device model bugs serious enough to need fixing
> > even for existing machine types, when the fix needs additional state
> > migrated?  Subsections tied to machine types are *not* a solution there.
> > That's why this isn't about preference!  It's about having a bug to fix
> > or not.
> 
> My problem is that people keep adding subsections to fix minor device
> bugs because they think breaking migration is irrelevant.  If the bug
> being fixed causes data corruption then OK I agree it is probably better
> to break migration, otherwise  you need to make a decision about whether
> fixing the device bug during migration is better or worse than having
> the migration fail.
> Some people say 'Migration failure is ok - people just try it again';
> sorry, no, that's useless in environments where the VM has 100s of GB
> of RAM and takes hours to migrate only for it then to fail, and it
> was urgent that it was migrated off that source host.

If this is a problem in practice, it sounds a lot like we need to
improve our handling of that situation in general. Why do we abort
the whole migration when serialising the state fails? Can't we abort
just the completion and switch back to the live state, and then retry a
bit later?

Most, if not all, of the subsections you mentioned that fix minor bugs
fix some states while the device is actively used and shorty after we
should be back in a state where the subsection isn't needed.

This is exactly the "fails 1 in 100 migrations" cases you talked about,
and they would be practically fixed if you made a few more attempts to
complete the migration before you let it fail (or perhaps even retry
indefinitely, as long as the user doesn't cancel migration).

Kevin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29  9:11           ` Kevin Wolf
@ 2015-05-29  9:38             ` Dr. David Alan Gilbert
  2015-05-29 10:27               ` Kevin Wolf
  0 siblings, 1 reply; 25+ messages in thread
From: Dr. David Alan Gilbert @ 2015-05-29  9:38 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: peter.maydell, John Snow, Markus Armbruster, qemu-block, qemu-devel

* Kevin Wolf (kwolf@redhat.com) wrote:
> Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben:
> > * Markus Armbruster (armbru@redhat.com) wrote:
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> > > 
> > > > * John Snow (jsnow@redhat.com) wrote:
> > > >> 
> > > >> 
> > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > > >> > The floppy controller spec describes three different controller phases,
> > > >> > which are currently not explicitly modelled in our emulation. Instead,
> > > >> > each phase is represented by a combination of flags in registers.
> > > >> > 
> > > >> > This patch makes explicit in which phase the controller currently is.
> > > >> > 
> > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > 
> > <snip>
> > 
> > > >> > +static bool fdc_phase_needed(void *opaque)
> > > >> > +{
> > > >> > +    FDCtrl *fdctrl = opaque;
> > > >> > +
> > > >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > > >> > +}
> > > >
> > > > OK, that is one way of doing it which should work, as long
> > > > as most of the time that matches.
> > > >
> > > > My preference for subsections is normally to make them just
> > > > conditional on machine type, that way backwards migration just
> > > > works.  However, if the result of the backwards migration would
> > > > be data corruption (which if I understand what you saying it could
> > > > in this case) then it's arguably better to fail migration in those
> > > > cases.
> > > 
> > > This isn't a matter of preference.
> > > 
> > > Subsections were designed to solves a problem we only have because we're
> > > not perfect: device model bugs, namely forgetting to migrate a bit of
> > > state, or failing to model it in the first place.
> > > 
> > > Subsections tied to machine types solve an entirely different problem:
> > > the old version of the device is just fine, including migration, but we
> > > now want a new and improved variant, which needs some more state
> > > migrated.  Putting that piece of state into a subsection tied to the new
> > > machine type avoids code duplication.
> > > 
> > > But what do you do for device model bugs serious enough to need fixing
> > > even for existing machine types, when the fix needs additional state
> > > migrated?  Subsections tied to machine types are *not* a solution there.
> > > That's why this isn't about preference!  It's about having a bug to fix
> > > or not.
> > 
> > My problem is that people keep adding subsections to fix minor device
> > bugs because they think breaking migration is irrelevant.  If the bug
> > being fixed causes data corruption then OK I agree it is probably better
> > to break migration, otherwise  you need to make a decision about whether
> > fixing the device bug during migration is better or worse than having
> > the migration fail.
> > Some people say 'Migration failure is ok - people just try it again';
> > sorry, no, that's useless in environments where the VM has 100s of GB
> > of RAM and takes hours to migrate only for it then to fail, and it
> > was urgent that it was migrated off that source host.
> 
> If this is a problem in practice, it sounds a lot like we need to
> improve our handling of that situation in general. Why do we abort
> the whole migration when serialising the state fails? Can't we abort
> just the completion and switch back to the live state, and then retry a
> bit later?

It's not the serialisation that fails, it's the deserialisation on the destination,
and our migration stream is currently one way, so we have no way
of signalling back to the source to 'just try that again'.
So the migration fails, the source carries on running and we can only
try again from the beginning.

> Most, if not all, of the subsections you mentioned that fix minor bugs
> fix some states while the device is actively used and shorty after we
> should be back in a state where the subsection isn't needed.

I think once any (non-iterative) device state has been sent we can't
necessarily abandon and try again; I'm not confident that the devices
would work if we sent one set of state and then tried to reload it later.
I think that would need a further pass that went around all the devices and
said 'is it OK to migrate now' and that would happen before any of the
non-iterative devices were migrated.

> This is exactly the "fails 1 in 100 migrations" cases you talked about,
> and they would be practically fixed if you made a few more attempts to
> complete the migration before you let it fail (or perhaps even retry
> indefinitely, as long as the user doesn't cancel migration).

This causes a problem if your belief in the safe-state to migrate
doesn't happen on some new OS version, because then it will retry
indefinitely.  You'd know you didn't need to wait on the newer machine
type, but you could still use this trick on the older machine type.
(Of course if you knew you were going to a new QEMU that could
always accept the new section then you wouldn't worry about this -
but people dont like sending qemu versions).

Dave

> 
> Kevin
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29  9:38             ` Dr. David Alan Gilbert
@ 2015-05-29 10:27               ` Kevin Wolf
  2015-05-29 10:34                 ` Dr. David Alan Gilbert
  0 siblings, 1 reply; 25+ messages in thread
From: Kevin Wolf @ 2015-05-29 10:27 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: peter.maydell, John Snow, Markus Armbruster, qemu-block, qemu-devel

Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben:
> * Kevin Wolf (kwolf@redhat.com) wrote:
> > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben:
> > > * Markus Armbruster (armbru@redhat.com) wrote:
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> > > > 
> > > > > * John Snow (jsnow@redhat.com) wrote:
> > > > >> 
> > > > >> 
> > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > > > >> > The floppy controller spec describes three different controller phases,
> > > > >> > which are currently not explicitly modelled in our emulation. Instead,
> > > > >> > each phase is represented by a combination of flags in registers.
> > > > >> > 
> > > > >> > This patch makes explicit in which phase the controller currently is.
> > > > >> > 
> > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > 
> > > <snip>
> > > 
> > > > >> > +static bool fdc_phase_needed(void *opaque)
> > > > >> > +{
> > > > >> > +    FDCtrl *fdctrl = opaque;
> > > > >> > +
> > > > >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > > > >> > +}
> > > > >
> > > > > OK, that is one way of doing it which should work, as long
> > > > > as most of the time that matches.
> > > > >
> > > > > My preference for subsections is normally to make them just
> > > > > conditional on machine type, that way backwards migration just
> > > > > works.  However, if the result of the backwards migration would
> > > > > be data corruption (which if I understand what you saying it could
> > > > > in this case) then it's arguably better to fail migration in those
> > > > > cases.
> > > > 
> > > > This isn't a matter of preference.
> > > > 
> > > > Subsections were designed to solves a problem we only have because we're
> > > > not perfect: device model bugs, namely forgetting to migrate a bit of
> > > > state, or failing to model it in the first place.
> > > > 
> > > > Subsections tied to machine types solve an entirely different problem:
> > > > the old version of the device is just fine, including migration, but we
> > > > now want a new and improved variant, which needs some more state
> > > > migrated.  Putting that piece of state into a subsection tied to the new
> > > > machine type avoids code duplication.
> > > > 
> > > > But what do you do for device model bugs serious enough to need fixing
> > > > even for existing machine types, when the fix needs additional state
> > > > migrated?  Subsections tied to machine types are *not* a solution there.
> > > > That's why this isn't about preference!  It's about having a bug to fix
> > > > or not.
> > > 
> > > My problem is that people keep adding subsections to fix minor device
> > > bugs because they think breaking migration is irrelevant.  If the bug
> > > being fixed causes data corruption then OK I agree it is probably better
> > > to break migration, otherwise  you need to make a decision about whether
> > > fixing the device bug during migration is better or worse than having
> > > the migration fail.
> > > Some people say 'Migration failure is ok - people just try it again';
> > > sorry, no, that's useless in environments where the VM has 100s of GB
> > > of RAM and takes hours to migrate only for it then to fail, and it
> > > was urgent that it was migrated off that source host.
> > 
> > If this is a problem in practice, it sounds a lot like we need to
> > improve our handling of that situation in general. Why do we abort
> > the whole migration when serialising the state fails? Can't we abort
> > just the completion and switch back to the live state, and then retry a
> > bit later?
> 
> It's not the serialisation that fails, it's the deserialisation on the destination,
> and our migration stream is currently one way, so we have no way
> of signalling back to the source to 'just try that again'.
> So the migration fails, the source carries on running and we can only
> try again from the beginning.

True, forgot about that.

So what if the management tool queried the destination for supported
subsections and passed that as an (optional) argument to the migration
command on the source?

Then the source would have full control and could complete migration
only when no unsupported subsection are needed.

> > Most, if not all, of the subsections you mentioned that fix minor bugs
> > fix some states while the device is actively used and shorty after we
> > should be back in a state where the subsection isn't needed.
> 
> I think once any (non-iterative) device state has been sent we can't
> necessarily abandon and try again; I'm not confident that the devices
> would work if we sent one set of state and then tried to reload it later.

Why would serialising the device state change any device state? Don't
you already continue running the source after sending non-iterative
device state when migration fails on the destination?

> I think that would need a further pass that went around all the devices and
> said 'is it OK to migrate now' and that would happen before any of the
> non-iterative devices were migrated.

Yes, another pass is an option, too.

> > This is exactly the "fails 1 in 100 migrations" cases you talked about,
> > and they would be practically fixed if you made a few more attempts to
> > complete the migration before you let it fail (or perhaps even retry
> > indefinitely, as long as the user doesn't cancel migration).
> 
> This causes a problem if your belief in the safe-state to migrate
> doesn't happen on some new OS version, because then it will retry
> indefinitely.  You'd know you didn't need to wait on the newer machine
> type, but you could still use this trick on the older machine type.
> (Of course if you knew you were going to a new QEMU that could
> always accept the new section then you wouldn't worry about this -
> but people dont like sending qemu versions).

Is retrying indefinitely a worse problem than failing migration, though?
Anyway, this was just a thought. You could as well give up after a
number of attempts and just increase that number from 1 currently to
at least maybe 10 or so. (Potentially even configurable, with
indefinitely being one option.)

Kevin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29 10:27               ` Kevin Wolf
@ 2015-05-29 10:34                 ` Dr. David Alan Gilbert
  2015-05-29 10:55                   ` Peter Maydell
  2015-05-29 10:59                   ` Kevin Wolf
  0 siblings, 2 replies; 25+ messages in thread
From: Dr. David Alan Gilbert @ 2015-05-29 10:34 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: peter.maydell, John Snow, Markus Armbruster, qemu-block, qemu-devel

* Kevin Wolf (kwolf@redhat.com) wrote:
> Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben:
> > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben:
> > > > * Markus Armbruster (armbru@redhat.com) wrote:
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> > > > > 
> > > > > > * John Snow (jsnow@redhat.com) wrote:
> > > > > >> 
> > > > > >> 
> > > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > > > > >> > The floppy controller spec describes three different controller phases,
> > > > > >> > which are currently not explicitly modelled in our emulation. Instead,
> > > > > >> > each phase is represented by a combination of flags in registers.
> > > > > >> > 
> > > > > >> > This patch makes explicit in which phase the controller currently is.
> > > > > >> > 
> > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > > 
> > > > <snip>
> > > > 
> > > > > >> > +static bool fdc_phase_needed(void *opaque)
> > > > > >> > +{
> > > > > >> > +    FDCtrl *fdctrl = opaque;
> > > > > >> > +
> > > > > >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > > > > >> > +}
> > > > > >
> > > > > > OK, that is one way of doing it which should work, as long
> > > > > > as most of the time that matches.
> > > > > >
> > > > > > My preference for subsections is normally to make them just
> > > > > > conditional on machine type, that way backwards migration just
> > > > > > works.  However, if the result of the backwards migration would
> > > > > > be data corruption (which if I understand what you saying it could
> > > > > > in this case) then it's arguably better to fail migration in those
> > > > > > cases.
> > > > > 
> > > > > This isn't a matter of preference.
> > > > > 
> > > > > Subsections were designed to solves a problem we only have because we're
> > > > > not perfect: device model bugs, namely forgetting to migrate a bit of
> > > > > state, or failing to model it in the first place.
> > > > > 
> > > > > Subsections tied to machine types solve an entirely different problem:
> > > > > the old version of the device is just fine, including migration, but we
> > > > > now want a new and improved variant, which needs some more state
> > > > > migrated.  Putting that piece of state into a subsection tied to the new
> > > > > machine type avoids code duplication.
> > > > > 
> > > > > But what do you do for device model bugs serious enough to need fixing
> > > > > even for existing machine types, when the fix needs additional state
> > > > > migrated?  Subsections tied to machine types are *not* a solution there.
> > > > > That's why this isn't about preference!  It's about having a bug to fix
> > > > > or not.
> > > > 
> > > > My problem is that people keep adding subsections to fix minor device
> > > > bugs because they think breaking migration is irrelevant.  If the bug
> > > > being fixed causes data corruption then OK I agree it is probably better
> > > > to break migration, otherwise  you need to make a decision about whether
> > > > fixing the device bug during migration is better or worse than having
> > > > the migration fail.
> > > > Some people say 'Migration failure is ok - people just try it again';
> > > > sorry, no, that's useless in environments where the VM has 100s of GB
> > > > of RAM and takes hours to migrate only for it then to fail, and it
> > > > was urgent that it was migrated off that source host.
> > > 
> > > If this is a problem in practice, it sounds a lot like we need to
> > > improve our handling of that situation in general. Why do we abort
> > > the whole migration when serialising the state fails? Can't we abort
> > > just the completion and switch back to the live state, and then retry a
> > > bit later?
> > 
> > It's not the serialisation that fails, it's the deserialisation on the destination,
> > and our migration stream is currently one way, so we have no way
> > of signalling back to the source to 'just try that again'.
> > So the migration fails, the source carries on running and we can only
> > try again from the beginning.
> 
> True, forgot about that.
> 
> So what if the management tool queried the destination for supported
> subsections and passed that as an (optional) argument to the migration
> command on the source?
> 
> Then the source would have full control and could complete migration
> only when no unsupported subsection are needed.

I tried doing something similar, where I had an optional parameter on
the source which was the version of the destination qemu; people really
hated that.

> > > Most, if not all, of the subsections you mentioned that fix minor bugs
> > > fix some states while the device is actively used and shorty after we
> > > should be back in a state where the subsection isn't needed.
> > 
> > I think once any (non-iterative) device state has been sent we can't
> > necessarily abandon and try again; I'm not confident that the devices
> > would work if we sent one set of state and then tried to reload it later.
> 
> Why would serialising the device state change any device state? Don't
> you already continue running the source after sending non-iterative
> device state when migration fails on the destination?

It's the destination I'm worried about here, not the source; lets say
you have two devices, a & b.  'a' gets serialised, but then 'b' finds
it has to wait, so we return to running the source and sending pages
across.   However the destination has already loaded the 'a' device state;
so that when we serialise again we send a new 'a' device state'; I'm
worrying here that the destination 'a' state loader would get upset
trying to load the same state twice without somehow resetting it.

> > I think that would need a further pass that went around all the devices and
> > said 'is it OK to migrate now' and that would happen before any of the
> > non-iterative devices were migrated.
> 
> Yes, another pass is an option, too.
> 
> > > This is exactly the "fails 1 in 100 migrations" cases you talked about,
> > > and they would be practically fixed if you made a few more attempts to
> > > complete the migration before you let it fail (or perhaps even retry
> > > indefinitely, as long as the user doesn't cancel migration).
> > 
> > This causes a problem if your belief in the safe-state to migrate
> > doesn't happen on some new OS version, because then it will retry
> > indefinitely.  You'd know you didn't need to wait on the newer machine
> > type, but you could still use this trick on the older machine type.
> > (Of course if you knew you were going to a new QEMU that could
> > always accept the new section then you wouldn't worry about this -
> > but people dont like sending qemu versions).
> 
> Is retrying indefinitely a worse problem than failing migration, though?
> Anyway, this was just a thought. You could as well give up after a
> number of attempts and just increase that number from 1 currently to
> at least maybe 10 or so. (Potentially even configurable, with
> indefinitely being one option.)

No, it's probably better than failing the migration, but what I'm worrying
about is, as in your code, where the destination has the new version
of qemu and could cope with the new subsections, so there is no need
to wait.

Dave

> 
> Kevin
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29 10:34                 ` Dr. David Alan Gilbert
@ 2015-05-29 10:55                   ` Peter Maydell
  2015-05-29 10:57                     ` Dr. David Alan Gilbert
  2015-05-29 10:59                   ` Kevin Wolf
  1 sibling, 1 reply; 25+ messages in thread
From: Peter Maydell @ 2015-05-29 10:55 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, John Snow, Markus Armbruster, qemu-block, QEMU Developers

On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
> It's the destination I'm worried about here, not the source; lets say
> you have two devices, a & b.  'a' gets serialised, but then 'b' finds
> it has to wait, so we return to running the source and sending pages
> across.   However the destination has already loaded the 'a' device state;
> so that when we serialise again we send a new 'a' device state'; I'm
> worrying here that the destination 'a' state loader would get upset
> trying to load the same state twice without somehow resetting it.

You would need to reset the system before resending device state,
I think. Device implementations rely on knowing that migration happens
into a freshly reset device, and we don't have any way of resetting
a single device, only the whole system at once.

-- PMM

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29 10:55                   ` Peter Maydell
@ 2015-05-29 10:57                     ` Dr. David Alan Gilbert
  2015-06-01 12:51                       ` Markus Armbruster
  0 siblings, 1 reply; 25+ messages in thread
From: Dr. David Alan Gilbert @ 2015-05-29 10:57 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Kevin Wolf, John Snow, Markus Armbruster, qemu-block, QEMU Developers

* Peter Maydell (peter.maydell@linaro.org) wrote:
> On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
> > It's the destination I'm worried about here, not the source; lets say
> > you have two devices, a & b.  'a' gets serialised, but then 'b' finds
> > it has to wait, so we return to running the source and sending pages
> > across.   However the destination has already loaded the 'a' device state;
> > so that when we serialise again we send a new 'a' device state'; I'm
> > worrying here that the destination 'a' state loader would get upset
> > trying to load the same state twice without somehow resetting it.
> 
> You would need to reset the system before resending device state,
> I think. Device implementations rely on knowing that migration happens
> into a freshly reset device, and we don't have any way of resetting
> a single device, only the whole system at once.

I don't think you can do a device (or certainly not a system) reset
without resending the contents of RAM which was the whole point of
not restarting the migration from the beginning.

Dave

> 
> -- PMM
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29 10:34                 ` Dr. David Alan Gilbert
  2015-05-29 10:55                   ` Peter Maydell
@ 2015-05-29 10:59                   ` Kevin Wolf
  1 sibling, 0 replies; 25+ messages in thread
From: Kevin Wolf @ 2015-05-29 10:59 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: peter.maydell, qemu-block, Markus Armbruster, qemu-devel,
	pbonzini, John Snow

Am 29.05.2015 um 12:34 hat Dr. David Alan Gilbert geschrieben:
> * Kevin Wolf (kwolf@redhat.com) wrote:
> > Am 29.05.2015 um 11:38 hat Dr. David Alan Gilbert geschrieben:
> > > * Kevin Wolf (kwolf@redhat.com) wrote:
> > > > Am 29.05.2015 um 10:33 hat Dr. David Alan Gilbert geschrieben:
> > > > > * Markus Armbruster (armbru@redhat.com) wrote:
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
> > > > > > 
> > > > > > > * John Snow (jsnow@redhat.com) wrote:
> > > > > > >> 
> > > > > > >> 
> > > > > > >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> > > > > > >> > The floppy controller spec describes three different controller phases,
> > > > > > >> > which are currently not explicitly modelled in our emulation. Instead,
> > > > > > >> > each phase is represented by a combination of flags in registers.
> > > > > > >> > 
> > > > > > >> > This patch makes explicit in which phase the controller currently is.
> > > > > > >> > 
> > > > > > >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > > > 
> > > > > <snip>
> > > > > 
> > > > > > >> > +static bool fdc_phase_needed(void *opaque)
> > > > > > >> > +{
> > > > > > >> > +    FDCtrl *fdctrl = opaque;
> > > > > > >> > +
> > > > > > >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
> > > > > > >> > +}
> > > > > > >
> > > > > > > OK, that is one way of doing it which should work, as long
> > > > > > > as most of the time that matches.
> > > > > > >
> > > > > > > My preference for subsections is normally to make them just
> > > > > > > conditional on machine type, that way backwards migration just
> > > > > > > works.  However, if the result of the backwards migration would
> > > > > > > be data corruption (which if I understand what you saying it could
> > > > > > > in this case) then it's arguably better to fail migration in those
> > > > > > > cases.
> > > > > > 
> > > > > > This isn't a matter of preference.
> > > > > > 
> > > > > > Subsections were designed to solves a problem we only have because we're
> > > > > > not perfect: device model bugs, namely forgetting to migrate a bit of
> > > > > > state, or failing to model it in the first place.
> > > > > > 
> > > > > > Subsections tied to machine types solve an entirely different problem:
> > > > > > the old version of the device is just fine, including migration, but we
> > > > > > now want a new and improved variant, which needs some more state
> > > > > > migrated.  Putting that piece of state into a subsection tied to the new
> > > > > > machine type avoids code duplication.
> > > > > > 
> > > > > > But what do you do for device model bugs serious enough to need fixing
> > > > > > even for existing machine types, when the fix needs additional state
> > > > > > migrated?  Subsections tied to machine types are *not* a solution there.
> > > > > > That's why this isn't about preference!  It's about having a bug to fix
> > > > > > or not.
> > > > > 
> > > > > My problem is that people keep adding subsections to fix minor device
> > > > > bugs because they think breaking migration is irrelevant.  If the bug
> > > > > being fixed causes data corruption then OK I agree it is probably better
> > > > > to break migration, otherwise  you need to make a decision about whether
> > > > > fixing the device bug during migration is better or worse than having
> > > > > the migration fail.
> > > > > Some people say 'Migration failure is ok - people just try it again';
> > > > > sorry, no, that's useless in environments where the VM has 100s of GB
> > > > > of RAM and takes hours to migrate only for it then to fail, and it
> > > > > was urgent that it was migrated off that source host.
> > > > 
> > > > If this is a problem in practice, it sounds a lot like we need to
> > > > improve our handling of that situation in general. Why do we abort
> > > > the whole migration when serialising the state fails? Can't we abort
> > > > just the completion and switch back to the live state, and then retry a
> > > > bit later?
> > > 
> > > It's not the serialisation that fails, it's the deserialisation on the destination,
> > > and our migration stream is currently one way, so we have no way
> > > of signalling back to the source to 'just try that again'.
> > > So the migration fails, the source carries on running and we can only
> > > try again from the beginning.
> > 
> > True, forgot about that.
> > 
> > So what if the management tool queried the destination for supported
> > subsections and passed that as an (optional) argument to the migration
> > command on the source?
> > 
> > Then the source would have full control and could complete migration
> > only when no unsupported subsection are needed.
> 
> I tried doing something similar, where I had an optional parameter on
> the source which was the version of the destination qemu; people really
> hated that.

Did they hate it because version numbers are really badly suited for the
task (I would agree with that) or already because the source gets some
information about the destination?

In the mailing list thread that I could find quicky, only Paolo seemed
to object, and the two arguments that I see is that the version number
isn't a good criterion and that it might not be worth the effort for a
corner case that isn't practically relevant.

Apparently you think that it is in fact practically relevant, so the
only point that remains is replacing the version number by something
better. The name and version ID of all supported sections and
subsections should be the definite source for qemu to tell whether it
can migrate to the destination now, possibly can migrate later, or can't
migrate at all (if the version ID on the destination is too small).

> > > > Most, if not all, of the subsections you mentioned that fix minor bugs
> > > > fix some states while the device is actively used and shorty after we
> > > > should be back in a state where the subsection isn't needed.
> > > 
> > > I think once any (non-iterative) device state has been sent we can't
> > > necessarily abandon and try again; I'm not confident that the devices
> > > would work if we sent one set of state and then tried to reload it later.
> > 
> > Why would serialising the device state change any device state? Don't
> > you already continue running the source after sending non-iterative
> > device state when migration fails on the destination?
> 
> It's the destination I'm worried about here, not the source; lets say
> you have two devices, a & b.  'a' gets serialised, but then 'b' finds
> it has to wait, so we return to running the source and sending pages
> across.   However the destination has already loaded the 'a' device state;
> so that when we serialise again we send a new 'a' device state'; I'm
> worrying here that the destination 'a' state loader would get upset
> trying to load the same state twice without somehow resetting it.

If that were the case, it would be a bug that needs fixing.

For pure VMState fields, it's obviously not a problem. Anything with
pre/post_load handlers could in theory have bugs, though.

> > > I think that would need a further pass that went around all the devices and
> > > said 'is it OK to migrate now' and that would happen before any of the
> > > non-iterative devices were migrated.
> > 
> > Yes, another pass is an option, too.

So if you're concerned about it, and we don't want to audit the
pre/post_load handlers, just do this second pass. It makes sense anyway
to fail as soon as possible and not send lots of data before we fail.

> > > > This is exactly the "fails 1 in 100 migrations" cases you talked about,
> > > > and they would be practically fixed if you made a few more attempts to
> > > > complete the migration before you let it fail (or perhaps even retry
> > > > indefinitely, as long as the user doesn't cancel migration).
> > > 
> > > This causes a problem if your belief in the safe-state to migrate
> > > doesn't happen on some new OS version, because then it will retry
> > > indefinitely.  You'd know you didn't need to wait on the newer machine
> > > type, but you could still use this trick on the older machine type.
> > > (Of course if you knew you were going to a new QEMU that could
> > > always accept the new section then you wouldn't worry about this -
> > > but people dont like sending qemu versions).
> > 
> > Is retrying indefinitely a worse problem than failing migration, though?
> > Anyway, this was just a thought. You could as well give up after a
> > number of attempts and just increase that number from 1 currently to
> > at least maybe 10 or so. (Potentially even configurable, with
> > indefinitely being one option.)
> 
> No, it's probably better than failing the migration, but what I'm worrying
> about is, as in your code, where the destination has the new version
> of qemu and could cope with the new subsections, so there is no need
> to wait.

Waiting obviously only makes sense when you know what subsections the
destination supports.

Kevin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29  8:33         ` Dr. David Alan Gilbert
  2015-05-29  9:11           ` Kevin Wolf
@ 2015-06-01 12:46           ` Markus Armbruster
  1 sibling, 0 replies; 25+ messages in thread
From: Markus Armbruster @ 2015-06-01 12:46 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, peter.maydell, John Snow, qemu-devel, qemu-block

"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:

> * Markus Armbruster (armbru@redhat.com) wrote:
>> "Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:
>> 
>> > * John Snow (jsnow@redhat.com) wrote:
>> >> 
>> >> 
>> >> On 05/21/2015 09:19 AM, Kevin Wolf wrote:
>> >> > The floppy controller spec describes three different controller phases,
>> >> > which are currently not explicitly modelled in our emulation. Instead,
>> >> > each phase is represented by a combination of flags in registers.
>> >> > 
>> >> > This patch makes explicit in which phase the controller currently is.
>> >> > 
>> >> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>
> <snip>
>
>> >> > +static bool fdc_phase_needed(void *opaque)
>> >> > +{
>> >> > +    FDCtrl *fdctrl = opaque;
>> >> > +
>> >> > +    return reconstruct_phase(fdctrl) != fdctrl->phase;
>> >> > +}
>> >
>> > OK, that is one way of doing it which should work, as long
>> > as most of the time that matches.
>> >
>> > My preference for subsections is normally to make them just
>> > conditional on machine type, that way backwards migration just
>> > works.  However, if the result of the backwards migration would
>> > be data corruption (which if I understand what you saying it could
>> > in this case) then it's arguably better to fail migration in those
>> > cases.
>> 
>> This isn't a matter of preference.
>> 
>> Subsections were designed to solves a problem we only have because we're
>> not perfect: device model bugs, namely forgetting to migrate a bit of
>> state, or failing to model it in the first place.
>> 
>> Subsections tied to machine types solve an entirely different problem:
>> the old version of the device is just fine, including migration, but we
>> now want a new and improved variant, which needs some more state
>> migrated.  Putting that piece of state into a subsection tied to the new
>> machine type avoids code duplication.
>> 
>> But what do you do for device model bugs serious enough to need fixing
>> even for existing machine types, when the fix needs additional state
>> migrated?  Subsections tied to machine types are *not* a solution there.
>> That's why this isn't about preference!  It's about having a bug to fix
>> or not.
>
> My problem is that people keep adding subsections to fix minor device
> bugs because they think breaking migration is irrelevant.  If the bug
> being fixed causes data corruption then OK I agree it is probably better
> to break migration, otherwise  you need to make a decision about whether
> fixing the device bug during migration is better or worse than having
> the migration fail.
> Some people say 'Migration failure is ok - people just try it again';
> sorry, no, that's useless in environments where the VM has 100s of GB
> of RAM and takes hours to migrate only for it then to fail, and it
> was urgent that it was migrated off that source host.
>
>> You seem rather reluctant to use subsections for their intended purpose.
>> I don't understand; their correctness argument is really simple.
>
> Correct, I am; it encourages people to break migration compatibility
> too strongly.
>
> Now, since I have to worry about backwards-migration downstream
> this influences my choice.   People keep telling me that upstream
> doesn't care about backwards migration compatibility, but I've not
> seen any evidence of that,

If we cared about backwards migration compatibility, we'd test it.

In my opinion, we do care about infrastructure for making backwards
migration possible, but that's about it.

>                            indeed I've seen other people trying
> to put backwards migration fixes in.

Doesn't quite rise to a level I'd call "care".

>> Fundamentally, we're picking a pair of state serialization and
>> deserialization functions (in the mathematical sense).
>> 
>> The obvious encoding function for a state consisting of pieces is to
>> encode all the pieces one by one.  Call this e(), and its decoding
>> function d().
>> 
>> Here's an alternative pair e'() and d'():
>> 
>> * Pick a piece of the state.
>> 
>> * Pick one of its values.  Let's call it "the prevalent value" for
>>   reasons that will become apparent shortly.
>> 
>> * Encode just like e() does, except omit this piece of state if and only
>>   if it has its prevalent value.
>> 
>> * In the decoding function, default this piece of state to its prevalent
>>   value.
>> 
>> The encodings are equivalent: d(e(x)) = d'(e'(x)) for all x.
>> 
>> At this point, you might want to tell me not to waste your time with
>> trivial math.  If so, good!  The triviality is exactly my point.
>
> Well I was going to mention it, but since you already have I wont bother....
> Actually, the problem here is that the symbols look trivial but they're
> meaningless without the following textual description, and that's
> where the problems are.
>
>> Straightforwardly translated to code, e() and d() put everything in the
>> section.  Drawback: add/remove/modify of state kills migration to and
>> from older versions.
>> 
>> e'() and d'() put a piece of state in a subsection that is only present
>> when the piece doesn't have its prevalent value.
>> 
>> Because the encodings are equivalent, forward migration transfers
>> exactly the same state regardless of which of them we choose.
>> 
>> So why not choose one that makes backward migration work exactly when
>> it's safe?
>> 
>> * Put all pieces of state the old version doesn't understand in
>>   subsections.
>> 
>> * Identify prevalent state.
>> 
>> * Double-check the old version behaves safely in prevalent state.
>> 
>> * In prevalent state, no subsections are sent, and migration succeeds.
>> 
>> * In any other state, migration fails.
>> 
>> Naturally, this is useful only when the prevalent state is actually
>> prevalent.
>
> The 'prevalent state' idea is what I hate about the use of subsections.
>
> It's very very difficult to reason about:
>    1) subtle changes in either the guest OS or the implementation can
>       change what the 'prevalent state' is.
>    2) Are you sure that the prevalent state is the same on Windows n+1?
>    3) What about during boot?
>    4) What about during installation?
>
> It's then roulette as to whether a migration will work.
> It's also a nightmare to test, having bugs where migration fails 1/100
> times on some random OS in a production environment is horrible, and
> that's exactly what the 'prevalent state' design produces.

Thanks for explaining your thinking, I believe I understand it better
now.

The rationale behind the "prevalent state" design is that failing
migration outright is better than letting it silently corrupt state.  I
still think that makes some sense.  However:

> I work to the rule that:
>    a) If the migration bug is such that the effect of the missing state
>       is really bad; a hung guest, or data corruption - then sure
>       use a subsection based on a 'prevalent state' decision.
>
>    b) Else tie it to the machine type.

You're right in that state corruption isn't necessarily catastrophic.
When its impact is minor, availability (backward migration succeeds)
*can* be preferable to correctness (state is exactly preserved under
backward and forward migration).

In other words: I agree with you there's a tradeoff to be made.

>    c) A combination is even better; i.e. always send the state
>       on new machine types, just because it makes the behaviour
>       more consistent.

I don't like (c), because it adds a second "needed" predicate.  Granted,
the new one's as trivial as can be, but X + trivial is still more
complicated than just X.

[...]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase
  2015-05-29 10:57                     ` Dr. David Alan Gilbert
@ 2015-06-01 12:51                       ` Markus Armbruster
  0 siblings, 0 replies; 25+ messages in thread
From: Markus Armbruster @ 2015-06-01 12:51 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Peter Maydell, John Snow, QEMU Developers, qemu-block

"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:

> * Peter Maydell (peter.maydell@linaro.org) wrote:
>> On 29 May 2015 at 11:34, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote:
>> > It's the destination I'm worried about here, not the source; lets say
>> > you have two devices, a & b.  'a' gets serialised, but then 'b' finds
>> > it has to wait, so we return to running the source and sending pages
>> > across.   However the destination has already loaded the 'a' device state;
>> > so that when we serialise again we send a new 'a' device state'; I'm
>> > worrying here that the destination 'a' state loader would get upset
>> > trying to load the same state twice without somehow resetting it.
>> 
>> You would need to reset the system before resending device state,
>> I think. Device implementations rely on knowing that migration happens
>> into a freshly reset device, and we don't have any way of resetting
>> a single device, only the whole system at once.
>
> I don't think you can do a device (or certainly not a system) reset
> without resending the contents of RAM which was the whole point of
> not restarting the migration from the beginning.

What about resetting all the devices, but keeping contents of RAM, then
send only newly dirtied RAM plus complete device state?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing
  2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
                   ` (7 preceding siblings ...)
  2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 8/8] fdc-test: Test state for existing cases more thoroughly Kevin Wolf
@ 2015-06-02 17:37 ` John Snow
  8 siblings, 0 replies; 25+ messages in thread
From: John Snow @ 2015-06-02 17:37 UTC (permalink / raw)
  To: Kevin Wolf, qemu-block; +Cc: peter.maydell, qemu-devel



On 05/21/2015 09:19 AM, Kevin Wolf wrote:
> The hotfix for CVE-2015-3456 fixed the security problem, but didn't
> fully correct the behaviour of the emulated floppy controller.  This
> series fixes the bug that was the root cause for the problem, and does
> some cleanup in the FIFO access functions to make the command processing
> more obvious.
> 
> v2:
> - Patch 3: Include fdctrl->phase in the migration state. [Peter]
> - Patch 4: Added a comment to clarify an assertion [Peter]
> - Patch 5: Check pos == 0 instead of fdctrl->data_pos == 1 [John]
> - Patch 7: Improved commit message [John]
> 
> FWIW, when testing this, I found that migration with active I/O on a
> floppy drive doesn't work very reliably. These problems were there
> before the series and they stay after the series. I verified as good
> as I could that the subsection magic does its job, and I'll leave
> fixing the other floppy migration bugs for someone else.
> 
> 
> Kevin Wolf (8):
>   fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase()
>   fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase()
>   fdc: Introduce fdctrl->phase
>   fdc: Use phase in fdctrl_write_data()
>   fdc: Code cleanup in fdctrl_write_data()
>   fdc: Disentangle phases in fdctrl_read_data()
>   fdc: Fix MSR.RQM flag
>   fdc-test: Test state for existing cases more thoroughly
> 
>  hw/block/fdc.c   | 296 ++++++++++++++++++++++++++++++++++++++++---------------
>  tests/fdc-test.c |  34 +++++++
>  2 files changed, 253 insertions(+), 77 deletions(-)
> 

>From what I can tell, it seems like Kevin's current migration approach
is appropriate for now, regardless of the migration policy debate that's
still ongoing.

It looks okay to me and David Gilbert gave it his ACK, so I have staged
this in my increasingly inaccurately named IDE branch, thanks.

https://github.com/jnsnow/qemu/commits/ide

--js

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2015-06-02 17:37 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-21 13:19 [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing Kevin Wolf
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 1/8] fdc: Rename fdctrl_reset_fifo() to fdctrl_to_command_phase() Kevin Wolf
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 2/8] fdc: Rename fdctrl_set_fifo() to fdctrl_to_result_phase() Kevin Wolf
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 3/8] fdc: Introduce fdctrl->phase Kevin Wolf
2015-05-21 21:55   ` John Snow
2015-05-28 17:29     ` Dr. David Alan Gilbert
2015-05-29  7:50       ` Markus Armbruster
2015-05-29  8:33         ` Dr. David Alan Gilbert
2015-05-29  9:11           ` Kevin Wolf
2015-05-29  9:38             ` Dr. David Alan Gilbert
2015-05-29 10:27               ` Kevin Wolf
2015-05-29 10:34                 ` Dr. David Alan Gilbert
2015-05-29 10:55                   ` Peter Maydell
2015-05-29 10:57                     ` Dr. David Alan Gilbert
2015-06-01 12:51                       ` Markus Armbruster
2015-05-29 10:59                   ` Kevin Wolf
2015-06-01 12:46           ` Markus Armbruster
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 4/8] fdc: Use phase in fdctrl_write_data() Kevin Wolf
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 5/8] fdc: Code cleanup " Kevin Wolf
2015-05-21 21:34   ` John Snow
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 6/8] fdc: Disentangle phases in fdctrl_read_data() Kevin Wolf
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 7/8] fdc: Fix MSR.RQM flag Kevin Wolf
2015-05-21 21:27   ` John Snow
2015-05-21 13:19 ` [Qemu-devel] [PATCH v2 8/8] fdc-test: Test state for existing cases more thoroughly Kevin Wolf
2015-06-02 17:37 ` [Qemu-devel] [PATCH v2 0/8] fdc: Clean up and fix command processing John Snow

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.