All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
@ 2018-10-22 21:28 George Kennedy
  2018-10-23 14:33 ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: George Kennedy @ 2018-10-22 21:28 UTC (permalink / raw)
  To: pbonzini, famz, qemu-devel, george.kennedy

Under heavy IO (e.g. fio) the queue is not checked frequently enough for
pending commands. As a result some pending commands are timed out by the
linux sym53c8xx driver, which sends SCSI Abort messages for the timed out
commands. The SCSI Abort messages result in linux errors, which show up
in /var/log/messages.

e.g.
sd 0:0:3:0: [sdd] tag#33 ABORT operation started
scsi target0:0:3: control msgout:
80 20 47 d
sd 0:0:3:0: ABORT operation complete.
scsi target0:0:4: message d sent on bad reselection

Add a deadline along with the command when it is added to the queue.
When the current command completes, check the queue for pending commands
that have exceeded the deadline and if so, simulate a Wait Reselect
to handle the pending commands on the queue.

When a Wait Reselect is needed, intercept and save the current DMA Scripts
Ptr (DSP) contents and load it instead with the pointer to the Reselection
Scripts.  When Reselection has completed, restore the original DSP contents.

Signed-off-by: George Kennedy <george.kennedy@oracle.com>
---
Thank you for reviewing, Paolo,

As you suggested I moved the loading of "s->resel_dsp" down to the "Wait Reselect"
case. The address of the Reselection Scripts, though, is contained in "s->dsp - 8"
and not in s->dnad.

The reason the timeout is needed is that under heavy IO some pending commands
stay on the pending queue longer than the 30 second command timeout set by the
linux upper layer scsi driver (sym53c8xx). When command timeouts occur, the
upper layer scsi driver sends SCSI Abort messages to remove the timed out
commands. The command timeouts are caused by the fact that under heavy IO,
lsi_reselect() in qemu "hw/scsi/lsi53c895a.c" is not being called before the
upper layer scsi driver 30 second command timeout goes off.

If lsi_reselect() were called more frequently, the command timeout problem would
probably not occur. There are a number of places where lsi_reselect() is supposed
to get called (e.g. at the end of lsi_update_irq()), but the only place that I
have observed lsi_reselect() being called is from lsi_execute_script() when
lsi_wait_reselect() is called because of a SCRIPT "Wait Select" IO Instruction.

The proposed patch adds a deadline timeout for each pending command added to the
pending queue. The timeout is an arbitrary value (less than the upper layer
command timeout) that gets checked after each command is completed when the
pending queue is checked. If the deadline is exceeded, a flag is set indicating
that a SCRIPT "Wait Select" IO Instruction is needed, which will result in
lsi_wait_reselect() and lsi_reselect() being called to remove a command from 
the pending queue, reselect the target, continue and complete the command.

 hw/scsi/lsi53c895a.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 996b406..8474399 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -198,6 +198,7 @@ typedef struct lsi_request {
     uint32_t dma_len;
     uint8_t *dma_buf;
     uint32_t pending;
+    uint64_t deadline;
     int out;
     QTAILQ_ENTRY(lsi_request) next;
 } lsi_request;
@@ -232,6 +233,9 @@ typedef struct {
     int command_complete;
     QTAILQ_HEAD(, lsi_request) queue;
     lsi_request *current;
+    int want_resel;     /* need resel to handle queued completed cmds */
+    uint32_t resel_dsp; /* DMA Scripts Ptr (DSP) of reselection scsi scripts */
+    uint32_t next_dsp;  /* if want_resel, will be loaded with above */
 
     uint32_t dsa;
     uint32_t temp;
@@ -311,6 +315,20 @@ static inline int lsi_irq_on_rsl(LSIState *s)
     return (s->sien0 & LSI_SIST0_RSL) && (s->scid & LSI_SCID_RRE);
 }
 
+static int pending_past_deadline(LSIState *s)
+{
+    lsi_request *p;
+
+    QTAILQ_FOREACH(p, &s->queue, next) {
+        if (p->pending) {
+            if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > p->deadline) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static void lsi_soft_reset(LSIState *s)
 {
     DPRINTF("Reset\n");
@@ -634,15 +652,22 @@ static void lsi_do_dma(LSIState *s, int out)
     }
 }
 
+/* Max time a completed command can be on the queue before Reselection needed */
+#define LSI_DEADLINE    1000
 
 /* Add a command to the queue.  */
 static void lsi_queue_command(LSIState *s)
 {
     lsi_request *p = s->current;
+    uint64_t timeout_ms = LSI_DEADLINE;
 
     DPRINTF("Queueing tag=0x%x\n", p->tag);
     assert(s->current != NULL);
     assert(s->current->dma_len == 0);
+
+    p->deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+        timeout_ms * 1000000ULL;
+
     QTAILQ_INSERT_TAIL(&s->queue, s->current, next);
     s->current = NULL;
 
@@ -775,6 +800,9 @@ static void lsi_command_complete(SCSIRequest *req, uint32_t status, size_t resid
         lsi_request_free(s, s->current);
         scsi_req_unref(req);
     }
+    if (pending_past_deadline(s)) {
+        s->want_resel = 1;
+    }
     lsi_resume_script(s);
 }
 
@@ -987,7 +1015,7 @@ static void lsi_do_msgout(LSIState *s)
             s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID;
             break;
         case 0x22: /* ORDERED queue */
-            BADF("ORDERED queue not implemented\n");
+            DPRINTF("ORDERED queue not implemented\n");
             s->select_tag |= lsi_get_msgbyte(s) | LSI_TAG_VALID;
             break;
         case 0x0d:
@@ -1078,6 +1106,9 @@ static void lsi_wait_reselect(LSIState *s)
 
     DPRINTF("Wait Reselect\n");
 
+    if (s->current)
+        return;
+
     QTAILQ_FOREACH(p, &s->queue, next) {
         if (p->pending) {
             lsi_reselect(s, p);
@@ -1089,6 +1120,8 @@ static void lsi_wait_reselect(LSIState *s)
     }
 }
 
+#define SCRIPTS_LOAD_AND_STORE  0xe2340004
+
 static void lsi_execute_script(LSIState *s)
 {
     PCIDevice *pci_dev = PCI_DEVICE(s);
@@ -1096,10 +1129,16 @@ static void lsi_execute_script(LSIState *s)
     uint32_t addr, addr_high;
     int opcode;
     int insn_processed = 0;
+    uint32_t save_dsp = 0;
 
     s->istat1 |= LSI_ISTAT1_SRUN;
 again:
     insn_processed++;
+    if (s->next_dsp) {
+        save_dsp = s->dsp;
+        s->dsp = s->next_dsp;
+        DPRINTF("lsi_execute_script: setting up for wait_reselection...\n");
+    }
     insn = read_dword(s, s->dsp);
     if (!insn) {
         /* If we receive an empty opcode increment the DSP by 4 bytes
@@ -1107,6 +1146,12 @@ again:
         s->dsp += 4;
         goto again;
     }
+    if (s->want_resel && s->resel_dsp && (insn == SCRIPTS_LOAD_AND_STORE)) {
+        /* Reselection follows Load and Store */
+        DPRINTF("lsi_execute_script: detects want_resel...\n");
+        s->next_dsp = s->resel_dsp;
+        s->want_resel = 0;
+    }
     addr = read_dword(s, s->dsp + 4);
     addr_high = 0;
     DPRINTF("SCRIPTS dsp=%08x opcode %08x arg %08x\n", s->dsp, insn, addr);
@@ -1273,7 +1318,14 @@ again:
                 s->scntl1 &= ~LSI_SCNTL1_CON;
                 break;
             case 2: /* Wait Reselect */
+                if (!s->resel_dsp) {
+                    s->resel_dsp = s->dsp - 8;
+                }
                 if (!lsi_irq_on_rsl(s)) {
+                    if (save_dsp) {
+                        s->dsp = save_dsp;
+                        save_dsp = s->next_dsp = 0;
+                    }
                     lsi_wait_reselect(s);
                 }
                 break;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-22 21:28 [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue George Kennedy
@ 2018-10-23 14:33 ` Paolo Bonzini
  2018-10-23 21:36   ` George Kennedy
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2018-10-23 14:33 UTC (permalink / raw)
  To: George Kennedy, famz, qemu-devel

On 22/10/2018 23:28, George Kennedy wrote:
> As you suggested I moved the loading of "s->resel_dsp" down to the "Wait Reselect"
> case. The address of the Reselection Scripts, though, is contained in "s->dsp - 8"
> and not in s->dnad.

Are you sure?  s->dsp - 8 should be the address of the Wait Reselect
instruction itself.  But you're right that s->dnad is the address at
which to jump "if the LSI53C895A is selected before being reselected"
(as the spec puts it) so the reselection DSP should be just s->dsp.

> The reason the timeout is needed is that under heavy IO some pending commands
> stay on the pending queue longer than the 30 second command timeout set by the
> linux upper layer scsi driver (sym53c8xx). When command timeouts occur, the
> upper layer scsi driver sends SCSI Abort messages to remove the timed out
> commands. The command timeouts are caused by the fact that under heavy IO,
> lsi_reselect() in qemu "hw/scsi/lsi53c895a.c" is not being called before the
> upper layer scsi driver 30 second command timeout goes off.
> 
> If lsi_reselect() were called more frequently, the command timeout problem would
> probably not occur. There are a number of places where lsi_reselect() is supposed
> to get called (e.g. at the end of lsi_update_irq()), but the only place that I
> have observed lsi_reselect() being called is from lsi_execute_script() when
> lsi_wait_reselect() is called because of a SCRIPT "Wait Select" IO Instruction.

Reselection should only happen when the target needs access to the bus,
which is when I/O has finished.  There should be no need for such a
deadline; reselection should already be happening at the right time when
lsi_transfer_data calls lsi_queue_req, which in turn calls lsi_reselect.

Maybe many of the places that call lsi_irq_on_rsl(s) also need to check
s->want_resel?

Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-23 14:33 ` Paolo Bonzini
@ 2018-10-23 21:36   ` George Kennedy
  2018-10-23 21:50     ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: George Kennedy @ 2018-10-23 21:36 UTC (permalink / raw)
  To: Paolo Bonzini, Fam Zheng, qemu-devel



On 10/23/2018 10:33 AM, Paolo Bonzini wrote:
> On 22/10/2018 23:28, George Kennedy wrote:
>> As you suggested I moved the loading of "s->resel_dsp" down to the "Wait Reselect"
>> case. The address of the Reselection Scripts, though, is contained in "s->dsp - 8"
>> and not in s->dnad.
> Are you sure?  s->dsp - 8 should be the address of the Wait Reselect
> instruction itself.  But you're right that s->dnad is the address at
> which to jump "if the LSI53C895A is selected before being reselected"
> (as the spec puts it) so the reselection DSP should be just s->dsp.

See within the 1st 25 lines of lsi_execute_script() where dsp is bumped 
up by 8, "s->dsp += 8", so it needs to be adjusted back to what it was.

>
>> The reason the timeout is needed is that under heavy IO some pending commands
>> stay on the pending queue longer than the 30 second command timeout set by the
>> linux upper layer scsi driver (sym53c8xx). When command timeouts occur, the
>> upper layer scsi driver sends SCSI Abort messages to remove the timed out
>> commands. The command timeouts are caused by the fact that under heavy IO,
>> lsi_reselect() in qemu "hw/scsi/lsi53c895a.c" is not being called before the
>> upper layer scsi driver 30 second command timeout goes off.
>>
>> If lsi_reselect() were called more frequently, the command timeout problem would
>> probably not occur. There are a number of places where lsi_reselect() is supposed
>> to get called (e.g. at the end of lsi_update_irq()), but the only place that I
>> have observed lsi_reselect() being called is from lsi_execute_script() when
>> lsi_wait_reselect() is called because of a SCRIPT "Wait Select" IO Instruction.
> Reselection should only happen when the target needs access to the bus,
> which is when I/O has finished.  There should be no need for such a
> deadline; reselection should already be happening at the right time when
> lsi_transfer_data calls lsi_queue_req, which in turn calls lsi_reselect.
Agree that it should happen as you describe, but under heavy IO (fio), 
it does not.

When it works as expected the check for "s->waiting == 1" (Wait Reselect 
instruction has been issued) in lsi_transfer_data() is true. Under heavy 
IO, s->waiting is not "1" for an extended period of time and 
lsi_queue_req() does not get called, which leaves any pending commands 
"stuck" on the queue because lsi_reselect() does not get called.

The Scripts are the only place where lsi_wait_reselect() is called and 
the only place where "s->waiting = 1" is set. So, the delay in getting a 
Scripts Wait Reselect command is the root cause of the problem.

The check in lsi_transfer_data() where it decides whether to call 
lsi_queue_req() is probably the preferred place to add a fix, but I have 
not been able to come up with a fix here that does not run into problems 
because of Script state.
>
> Maybe many of the places that call lsi_irq_on_rsl(s) also need to check
> s->want_resel?

I've added debug to all the places where lsi_reselect() should be 
called, but under heavy IO lsi_reselect() does not get called for a 
period of time exceeding the upper layer's 30 second command timeout, 
hence the need for the patch which injects a Scripts Wait Reselect IO 
command.

My test setup consists of 5 remote iscsi disks. Here are the fio write 
arguments, which show the problem:

[global]
bs=256k
iodepth=2
direct=1
ioengine=libaio
randrepeat=0
group_reporting
time_based
runtime=60
numjobs=40
name=test
rw=write

[job1]
filename=/dev/sda
filename=/dev/sdb
filename=/dev/sdc
filename=/dev/sdd
filename=/dev/sde


I am not strongly attached to my proposed fix. If an alternative fix can 
be suggested, I'd be more than willing to try that.

Thank you,
George

>
> Paolo
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-23 21:36   ` George Kennedy
@ 2018-10-23 21:50     ` Paolo Bonzini
  2018-10-23 22:11       ` George Kennedy
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2018-10-23 21:50 UTC (permalink / raw)
  To: George Kennedy, Fam Zheng, qemu-devel

On 23/10/2018 23:36, George Kennedy wrote:
> 
> 
> On 10/23/2018 10:33 AM, Paolo Bonzini wrote:
>> On 22/10/2018 23:28, George Kennedy wrote:
>>> As you suggested I moved the loading of "s->resel_dsp" down to the
>>> "Wait Reselect"
>>> case. The address of the Reselection Scripts, though, is contained in
>>> "s->dsp - 8"
>>> and not in s->dnad.
>> Are you sure?  s->dsp - 8 should be the address of the Wait Reselect
>> instruction itself.  But you're right that s->dnad is the address at
>> which to jump "if the LSI53C895A is selected before being reselected"
>> (as the spec puts it) so the reselection DSP should be just s->dsp.
> 
> See within the 1st 25 lines of lsi_execute_script() where dsp is bumped
> up by 8, "s->dsp += 8", so it needs to be adjusted back to what it was.

The spec says "If the LSI53C895A is reselected, it fetches the next
instruction from the address pointed to by the DMA
SCRIPTS Pointer (DSP) register".  The first instruction of the
reselection scripts is the one after WAIT RESELECT, i.e. s->dsp.


>> Reselection should only happen when the target needs access to the bus,
>> which is when I/O has finished.  There should be no need for such a
>> deadline; reselection should already be happening at the right time when
>> lsi_transfer_data calls lsi_queue_req, which in turn calls lsi_reselect.
> Agree that it should happen as you describe, but under heavy IO (fio),
> it does not.
> 
> When it works as expected the check for "s->waiting == 1" (Wait Reselect
> instruction has been issued) in lsi_transfer_data() is true. Under heavy
> IO, s->waiting is not "1" for an extended period of time

What about "req->hba_private != s->current"?  That should cause a call
to lsi_queue_req, and then you can check s->want_resel in lsi_queue_req.

> I am not strongly attached to my proposed fix. If an alternative fix can
> be suggested, I'd be more than willing to try that.

The problem is that the timeout has no explanation under the SCSI
protocol, so I would like to understand where the logic is wrong in the
Parallel SCSI emulation.

Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-23 21:50     ` Paolo Bonzini
@ 2018-10-23 22:11       ` George Kennedy
  2018-10-23 22:31         ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: George Kennedy @ 2018-10-23 22:11 UTC (permalink / raw)
  To: Paolo Bonzini, Fam Zheng, qemu-devel



On 10/23/2018 5:50 PM, Paolo Bonzini wrote:
> On 23/10/2018 23:36, George Kennedy wrote:
>>
>> On 10/23/2018 10:33 AM, Paolo Bonzini wrote:
>>> On 22/10/2018 23:28, George Kennedy wrote:
>>>> As you suggested I moved the loading of "s->resel_dsp" down to the
>>>> "Wait Reselect"
>>>> case. The address of the Reselection Scripts, though, is contained in
>>>> "s->dsp - 8"
>>>> and not in s->dnad.
>>> Are you sure?  s->dsp - 8 should be the address of the Wait Reselect
>>> instruction itself.  But you're right that s->dnad is the address at
>>> which to jump "if the LSI53C895A is selected before being reselected"
>>> (as the spec puts it) so the reselection DSP should be just s->dsp.
>> See within the 1st 25 lines of lsi_execute_script() where dsp is bumped
>> up by 8, "s->dsp += 8", so it needs to be adjusted back to what it was.
> The spec says "If the LSI53C895A is reselected, it fetches the next
> instruction from the address pointed to by the DMA
> SCRIPTS Pointer (DSP) register".  The first instruction of the
> reselection scripts is the one after WAIT RESELECT, i.e. s->dsp.
>
>
>>> Reselection should only happen when the target needs access to the bus,
>>> which is when I/O has finished.  There should be no need for such a
>>> deadline; reselection should already be happening at the right time when
>>> lsi_transfer_data calls lsi_queue_req, which in turn calls lsi_reselect.
>> Agree that it should happen as you describe, but under heavy IO (fio),
>> it does not.
>>
>> When it works as expected the check for "s->waiting == 1" (Wait Reselect
>> instruction has been issued) in lsi_transfer_data() is true. Under heavy
>> IO, s->waiting is not "1" for an extended period of time
> What about "req->hba_private != s->current"?  That should cause a call
> to lsi_queue_req, and then you can check s->want_resel in lsi_queue_req.

For the extended period of time where lsi_queue_req() is not being 
called from lsi_transfer_data(), my debug shows "s->waiting" is not "1" 
and req->hba_private is equal to s->current.

req->hba_private is set to NULL in lsi_command_complete() and that's 
where I tried to add a call to lsi_reselect(), but the Scripts are not 
in the correct state to allow the call.

lsi_transfer_data() or lsi_command_complete() are probably the 2 
potential places where a fix could be added if the Script state would 
allow it.
>
>> I am not strongly attached to my proposed fix. If an alternative fix can
>> be suggested, I'd be more than willing to try that.
> The problem is that the timeout has no explanation under the SCSI
> protocol, so I would like to understand where the logic is wrong in the
> Parallel SCSI emulation.

Agreed. That's why I'm hoping for an alternate fix.

Thank you,
George
>
> Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-23 22:11       ` George Kennedy
@ 2018-10-23 22:31         ` Paolo Bonzini
  2018-10-24 13:06           ` George Kennedy
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2018-10-23 22:31 UTC (permalink / raw)
  To: George Kennedy, Fam Zheng, qemu-devel

On 24/10/2018 00:11, George Kennedy wrote:
>>>
>> What about "req->hba_private != s->current"?  That should cause a call
>> to lsi_queue_req, and then you can check s->want_resel in lsi_queue_req.
> 
> For the extended period of time where lsi_queue_req() is not being
> called from lsi_transfer_data(), my debug shows "s->waiting" is not "1"
> and req->hba_private is equal to s->current.

That would mean indeed that no reselection is needed---but that's wrong.

Why didn't lsi_do_command invoke lsi_queue_command?  That would set
s->current to NULL (on the SCSI level, that means the bus is freed; on
the QEMU level, the idea is that lsi_transfer_data would then start a
reselection).

Thanks,

Paolo

> req->hba_private is set to NULL in lsi_command_complete() and that's
> where I tried to add a call to lsi_reselect(), but the Scripts are not
> in the correct state to allow the call.
> 
> lsi_transfer_data() or lsi_command_complete() are probably the 2
> potential places where a fix could be added if the Script state would
> allow it.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-23 22:31         ` Paolo Bonzini
@ 2018-10-24 13:06           ` George Kennedy
  2018-10-24 22:27             ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: George Kennedy @ 2018-10-24 13:06 UTC (permalink / raw)
  To: Paolo Bonzini, Fam Zheng, qemu-devel



On 10/23/2018 6:31 PM, Paolo Bonzini wrote:
> On 24/10/2018 00:11, George Kennedy wrote:
>>> What about "req->hba_private != s->current"?  That should cause a call
>>> to lsi_queue_req, and then you can check s->want_resel in lsi_queue_req.
>> For the extended period of time where lsi_queue_req() is not being
>> called from lsi_transfer_data(), my debug shows "s->waiting" is not "1"
>> and req->hba_private is equal to s->current.
> That would mean indeed that no reselection is needed---but that's wrong.
>
> Why didn't lsi_do_command invoke lsi_queue_command?  That would set
> s->current to NULL (on the SCSI level, that means the bus is freed; on
> the QEMU level, the idea is that lsi_transfer_data would then start a
> reselection).

Through the extended period of time with no call to lsi_reselect(), the 
check of "s->command_complete" in lsi_do_command() is always "1" and 
therefore no call to lsi_queue_command() occurs.

"s->command_complete" is set to "1" in lsi_transfer_data().

>
> Thanks,
>
> Paolo
>
>> req->hba_private is set to NULL in lsi_command_complete() and that's
>> where I tried to add a call to lsi_reselect(), but the Scripts are not
>> in the correct state to allow the call.
>>
>> lsi_transfer_data() or lsi_command_complete() are probably the 2
>> potential places where a fix could be added if the Script state would
>> allow it.
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue
  2018-10-24 13:06           ` George Kennedy
@ 2018-10-24 22:27             ` Paolo Bonzini
  0 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2018-10-24 22:27 UTC (permalink / raw)
  To: George Kennedy, Fam Zheng, qemu-devel

On 24/10/2018 15:06, George Kennedy wrote:
>>
>>
>> Why didn't lsi_do_command invoke lsi_queue_command?  That would set
>> s->current to NULL (on the SCSI level, that means the bus is freed; on
>> the QEMU level, the idea is that lsi_transfer_data would then start a
>> reselection).
> 
> Through the extended period of time with no call to lsi_reselect(), the
> check of "s->command_complete" in lsi_do_command() is always "1" and
> therefore no call to lsi_queue_command() occurs.
> 
> "s->command_complete" is set to "1" in lsi_transfer_data().

Ok, I think we're getting closer.  If the data transfer is from the
device (read), then you can disconnect until the read is complete.

If the data transfer however is to the device (write), there will be
immediately a call to scsi_transfer_data, in order to get the written
data, and s->command_complete is set to 1.

However, this is wrong: the DMA transfer doesn't per se prevent a
disconnect and a later reselect, and this might be the bug.  Basically
we want to get rid of the "s->command_complete = 1" case completely.
The patch to do this change in the SCSI bus handling would look
something like this (untested):

diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index d1e6534311..c32ef40e78 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -565,6 +565,21 @@ static void lsi_bad_selection(LSIState *s, uint32_t id)
     lsi_disconnect(s);
 }

+/* Disconnect when DMA has finished but the command did not
+ * complete immediately.
+ */
+static void lsi_maybe_disconnect(LSIState *s)
+{
+    if (!s->current->dma_len && !s->command_complete) {
+        lsi_add_msg_byte(s, 2); /* SAVE DATA POINTER */
+        lsi_add_msg_byte(s, 4); /* DISCONNECT */
+        /* wait data */
+        lsi_set_phase(s, PHASE_MI);
+        s->msg_action = 1;
+        lsi_queue_command(s);
+    }
+}
+
 /* Initiate a SCSI layer data transfer.  */
 static void lsi_do_dma(LSIState *s, int out)
 {
@@ -612,6 +627,7 @@ static void lsi_do_dma(LSIState *s, int out)
     if (s->current->dma_len == 0) {
         s->current->dma_buf = NULL;
         scsi_req_continue(s->current->req);
+        lsi_maybe_disconnect(s);
     } else {
         s->current->dma_buf += count;
         lsi_resume_script(s);
@@ -631,7 +647,6 @@ static void lsi_queue_command(LSIState *s)
     s->current = NULL;

     p->pending = 0;
-    p->out = (s->sstat1 & PHASE_MASK) == PHASE_DO;
 }

 /* Queue a byte for a MSG IN phase.  */
@@ -746,7 +761,7 @@ static void lsi_command_complete(SCSIRequest *req,
     out = (s->sstat1 & PHASE_MASK) == PHASE_DO;
     trace_lsi_command_complete(status);
     s->status = status;
-    s->command_complete = 2;
+    s->command_complete = 1;
     if (s->waiting && s->dbc != 0) {
         /* Raise phase mismatch for short transfers.  */
         lsi_bad_phase(s, out, PHASE_ST);
@@ -781,7 +796,6 @@ static void lsi_transfer_data(SCSIRequest *req,
     /* host adapter (re)connected */
     trace_lsi_transfer_data(req->tag, len);
     s->current->dma_len = len;
-    s->command_complete = 1;
     if (s->waiting) {
         if (s->waiting == 1 || s->dbc == 0) {
             lsi_resume_script(s);
@@ -819,28 +833,12 @@ static void lsi_do_command(LSIState *s)
                                    s->current);

     n = scsi_req_enqueue(s->current->req);
+    s->current->out = (n < 0);
+    lsi_set_phase(s, s->current->out ? PHASE_DO : PHASE_DI);
     if (n) {
-        if (n > 0) {
-            lsi_set_phase(s, PHASE_DI);
-        } else if (n < 0) {
-            lsi_set_phase(s, PHASE_DO);
-        }
         scsi_req_continue(s->current->req);
     }
-    if (!s->command_complete) {
-        if (n) {
-            /* Command did not complete immediately so disconnect.  */
-            lsi_add_msg_byte(s, 2); /* SAVE DATA POINTER */
-            lsi_add_msg_byte(s, 4); /* DISCONNECT */
-            /* wait data */
-            lsi_set_phase(s, PHASE_MI);
-            s->msg_action = 1;
-            lsi_queue_command(s);
-        } else {
-            /* wait command complete */
-            lsi_set_phase(s, PHASE_DI);
-        }
-    }
+    lsi_maybe_disconnect(s);
 }

 static void lsi_do_status(LSIState *s)


Of course, this doesn't include the change to WAIT RESELECT handling.
It only ensures (or attempts to ensure) that the bus is disconnected and
later reselected on long-running I/O, no matter the data direction.

Thanks,

Paolo

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-10-24 22:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-22 21:28 [Qemu-devel] [PATCH v2] lsi: Reselection needed to remove pending commands from queue George Kennedy
2018-10-23 14:33 ` Paolo Bonzini
2018-10-23 21:36   ` George Kennedy
2018-10-23 21:50     ` Paolo Bonzini
2018-10-23 22:11       ` George Kennedy
2018-10-23 22:31         ` Paolo Bonzini
2018-10-24 13:06           ` George Kennedy
2018-10-24 22:27             ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.