All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] e1000e: using bottom half to send packets
@ 2020-07-16 16:14 Li Qiang
  2020-07-17  3:10 ` Jason Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Li Qiang @ 2020-07-16 16:14 UTC (permalink / raw)
  To: dmitry.fleytman, jasowang, pbonzini, mst; +Cc: Li Qiang, liq3ea, qemu-devel

Alexander Bulekov reported a UAF bug related e1000e packets send.

-->https://bugs.launchpad.net/qemu/+bug/1886362

This is because the guest trigger a e1000e packet send and set the
data's address to e1000e's MMIO address. So when the e1000e do DMA
it will write the MMIO again and trigger re-entrancy and finally
causes this UAF.

Paolo suggested to use a bottom half whenever MMIO is doing complicate
things in here:
-->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html

Reference here:
'The easiest solution is to delay processing of descriptors to a bottom
half whenever MMIO is doing something complicated.  This is also better
for latency because it will free the vCPU thread more quickly and leave
the work to the I/O thread.'

This patch fixes this UAF.

Signed-off-by: Li Qiang <liq3ea@163.com>
---
 hw/net/e1000e_core.c | 25 +++++++++++++++++--------
 hw/net/e1000e_core.h |  2 ++
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index bcd186cac5..6165b04b68 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
 static void
 e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
 {
-    E1000E_TxRing txr;
     core->mac[index] = val;
 
     if (core->mac[TARC0] & E1000_TARC_ENABLE) {
-        e1000e_tx_ring_init(core, &txr, 0);
-        e1000e_start_xmit(core, &txr);
+        qemu_bh_schedule(core->tx[0].tx_bh);
     }
 
     if (core->mac[TARC1] & E1000_TARC_ENABLE) {
-        e1000e_tx_ring_init(core, &txr, 1);
-        e1000e_start_xmit(core, &txr);
+        qemu_bh_schedule(core->tx[1].tx_bh);
     }
 }
 
 static void
 e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
 {
-    E1000E_TxRing txr;
     int qidx = e1000e_mq_queue_idx(TDT, index);
     uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
 
     core->mac[index] = val & 0xffff;
 
     if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
-        e1000e_tx_ring_init(core, &txr, qidx);
-        e1000e_start_xmit(core, &txr);
+        qemu_bh_schedule(core->tx[qidx].tx_bh);
     }
 }
 
@@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
     }
 }
 
+static void e1000e_core_tx_bh(void *opaque)
+{
+    struct e1000e_tx *tx = opaque;
+    E1000ECore *core = tx->core;
+    E1000E_TxRing txr;
+
+    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
+    e1000e_start_xmit(core, &txr);
+}
+
 void
 e1000e_core_pci_realize(E1000ECore     *core,
                         const uint16_t *eeprom_templ,
@@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
     for (i = 0; i < E1000E_NUM_QUEUES; i++) {
         net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
                         E1000E_MAX_TX_FRAGS, core->has_vnet);
+        core->tx[i].core = core;
+        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
     }
 
     net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
@@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
     for (i = 0; i < E1000E_NUM_QUEUES; i++) {
         net_tx_pkt_reset(core->tx[i].tx_pkt);
         net_tx_pkt_uninit(core->tx[i].tx_pkt);
+        qemu_bh_delete(core->tx[i].tx_bh);
+        core->tx[i].tx_bh = NULL;
     }
 
     net_rx_pkt_uninit(core->rx_pkt);
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
index aee32f7e48..94ddc6afc2 100644
--- a/hw/net/e1000e_core.h
+++ b/hw/net/e1000e_core.h
@@ -77,6 +77,8 @@ struct E1000Core {
         unsigned char sum_needed;
         bool cptse;
         struct NetTxPkt *tx_pkt;
+        QEMUBH *tx_bh;
+        E1000ECore *core;
     } tx[E1000E_NUM_QUEUES];
 
     struct NetRxPkt *rx_pkt;
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-16 16:14 [PATCH] e1000e: using bottom half to send packets Li Qiang
@ 2020-07-17  3:10 ` Jason Wang
  2020-07-17  4:46   ` Li Qiang
  2020-07-17 15:52   ` Peter Maydell
  0 siblings, 2 replies; 9+ messages in thread
From: Jason Wang @ 2020-07-17  3:10 UTC (permalink / raw)
  To: Li Qiang, dmitry.fleytman, pbonzini, mst; +Cc: liq3ea, qemu-devel


On 2020/7/17 上午12:14, Li Qiang wrote:
> Alexander Bulekov reported a UAF bug related e1000e packets send.
>
> -->https://bugs.launchpad.net/qemu/+bug/1886362
>
> This is because the guest trigger a e1000e packet send and set the
> data's address to e1000e's MMIO address. So when the e1000e do DMA
> it will write the MMIO again and trigger re-entrancy and finally
> causes this UAF.
>
> Paolo suggested to use a bottom half whenever MMIO is doing complicate
> things in here:
> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
>
> Reference here:
> 'The easiest solution is to delay processing of descriptors to a bottom
> half whenever MMIO is doing something complicated.  This is also better
> for latency because it will free the vCPU thread more quickly and leave
> the work to the I/O thread.'


I think several things were missed in this patch (take virtio-net as a 
reference), do we need the following things:

- Cancel the bh when VM is stopped.
- A throttle to prevent bh from executing too much timer?
- A flag to record whether or not this a pending tx (and migrate it?)

Thanks


>
> This patch fixes this UAF.
>
> Signed-off-by: Li Qiang <liq3ea@163.com>
> ---
>   hw/net/e1000e_core.c | 25 +++++++++++++++++--------
>   hw/net/e1000e_core.h |  2 ++
>   2 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index bcd186cac5..6165b04b68 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
>   static void
>   e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
>   {
> -    E1000E_TxRing txr;
>       core->mac[index] = val;
>   
>       if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> -        e1000e_tx_ring_init(core, &txr, 0);
> -        e1000e_start_xmit(core, &txr);
> +        qemu_bh_schedule(core->tx[0].tx_bh);
>       }
>   
>       if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> -        e1000e_tx_ring_init(core, &txr, 1);
> -        e1000e_start_xmit(core, &txr);
> +        qemu_bh_schedule(core->tx[1].tx_bh);
>       }
>   }
>   
>   static void
>   e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
>   {
> -    E1000E_TxRing txr;
>       int qidx = e1000e_mq_queue_idx(TDT, index);
>       uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
>   
>       core->mac[index] = val & 0xffff;
>   
>       if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
> -        e1000e_tx_ring_init(core, &txr, qidx);
> -        e1000e_start_xmit(core, &txr);
> +        qemu_bh_schedule(core->tx[qidx].tx_bh);
>       }
>   }
>   
> @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
>       }
>   }
>   
> +static void e1000e_core_tx_bh(void *opaque)
> +{
> +    struct e1000e_tx *tx = opaque;
> +    E1000ECore *core = tx->core;
> +    E1000E_TxRing txr;
> +
> +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
> +    e1000e_start_xmit(core, &txr);
> +}
> +
>   void
>   e1000e_core_pci_realize(E1000ECore     *core,
>                           const uint16_t *eeprom_templ,
> @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
>       for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>           net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
>                           E1000E_MAX_TX_FRAGS, core->has_vnet);
> +        core->tx[i].core = core;
> +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
>       }
>   
>       net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
> @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
>       for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>           net_tx_pkt_reset(core->tx[i].tx_pkt);
>           net_tx_pkt_uninit(core->tx[i].tx_pkt);
> +        qemu_bh_delete(core->tx[i].tx_bh);
> +        core->tx[i].tx_bh = NULL;
>       }
>   
>       net_rx_pkt_uninit(core->rx_pkt);
> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> index aee32f7e48..94ddc6afc2 100644
> --- a/hw/net/e1000e_core.h
> +++ b/hw/net/e1000e_core.h
> @@ -77,6 +77,8 @@ struct E1000Core {
>           unsigned char sum_needed;
>           bool cptse;
>           struct NetTxPkt *tx_pkt;
> +        QEMUBH *tx_bh;
> +        E1000ECore *core;
>       } tx[E1000E_NUM_QUEUES];
>   
>       struct NetRxPkt *rx_pkt;



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17  3:10 ` Jason Wang
@ 2020-07-17  4:46   ` Li Qiang
  2020-07-17  5:38     ` Jason Wang
  2020-07-17 15:52   ` Peter Maydell
  1 sibling, 1 reply; 9+ messages in thread
From: Li Qiang @ 2020-07-17  4:46 UTC (permalink / raw)
  To: Jason Wang
  Cc: Paolo Bonzini, Dmitry Fleytman, Li Qiang, Qemu Developers,
	Michael S. Tsirkin

Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 上午11:10写道:
>
>
> On 2020/7/17 上午12:14, Li Qiang wrote:
> > Alexander Bulekov reported a UAF bug related e1000e packets send.
> >
> > -->https://bugs.launchpad.net/qemu/+bug/1886362
> >
> > This is because the guest trigger a e1000e packet send and set the
> > data's address to e1000e's MMIO address. So when the e1000e do DMA
> > it will write the MMIO again and trigger re-entrancy and finally
> > causes this UAF.
> >
> > Paolo suggested to use a bottom half whenever MMIO is doing complicate
> > things in here:
> > -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
> >
> > Reference here:
> > 'The easiest solution is to delay processing of descriptors to a bottom
> > half whenever MMIO is doing something complicated.  This is also better
> > for latency because it will free the vCPU thread more quickly and leave
> > the work to the I/O thread.'
>
>
> I think several things were missed in this patch (take virtio-net as a
> reference), do we need the following things:
>

Thanks Jason,
In fact I know this, I'm scared for touching this but I want to try.
Thanks for your advice.

> - Cancel the bh when VM is stopped.

Ok. I think add a vm state change notifier for e1000e can address this.

> - A throttle to prevent bh from executing too much timer?

Ok, I think add a config timeout and add a timer in e1000e can address this.

> - A flag to record whether or not this a pending tx (and migrate it?)

Is just a flag enough? Could you explain more about the idea behind
processing the virtio-net/e1000e using bh like this?
For example, if the guest trigger a lot of packets send and if the bh
is scheduled in IO thread. So will we lost packets?
How we avoid this in virtio-net.

Thanks,
Li Qiang



>
> Thanks
>
>
> >
> > This patch fixes this UAF.
> >
> > Signed-off-by: Li Qiang <liq3ea@163.com>
> > ---
> >   hw/net/e1000e_core.c | 25 +++++++++++++++++--------
> >   hw/net/e1000e_core.h |  2 ++
> >   2 files changed, 19 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> > index bcd186cac5..6165b04b68 100644
> > --- a/hw/net/e1000e_core.c
> > +++ b/hw/net/e1000e_core.c
> > @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
> >   static void
> >   e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
> >   {
> > -    E1000E_TxRing txr;
> >       core->mac[index] = val;
> >
> >       if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> > -        e1000e_tx_ring_init(core, &txr, 0);
> > -        e1000e_start_xmit(core, &txr);
> > +        qemu_bh_schedule(core->tx[0].tx_bh);
> >       }
> >
> >       if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> > -        e1000e_tx_ring_init(core, &txr, 1);
> > -        e1000e_start_xmit(core, &txr);
> > +        qemu_bh_schedule(core->tx[1].tx_bh);
> >       }
> >   }
> >
> >   static void
> >   e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
> >   {
> > -    E1000E_TxRing txr;
> >       int qidx = e1000e_mq_queue_idx(TDT, index);
> >       uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
> >
> >       core->mac[index] = val & 0xffff;
> >
> >       if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
> > -        e1000e_tx_ring_init(core, &txr, qidx);
> > -        e1000e_start_xmit(core, &txr);
> > +        qemu_bh_schedule(core->tx[qidx].tx_bh);
> >       }
> >   }
> >
> > @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
> >       }
> >   }
> >
> > +static void e1000e_core_tx_bh(void *opaque)
> > +{
> > +    struct e1000e_tx *tx = opaque;
> > +    E1000ECore *core = tx->core;
> > +    E1000E_TxRing txr;
> > +
> > +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
> > +    e1000e_start_xmit(core, &txr);
> > +}
> > +
> >   void
> >   e1000e_core_pci_realize(E1000ECore     *core,
> >                           const uint16_t *eeprom_templ,
> > @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
> >       for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >           net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
> >                           E1000E_MAX_TX_FRAGS, core->has_vnet);
> > +        core->tx[i].core = core;
> > +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
> >       }
> >
> >       net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
> > @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
> >       for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >           net_tx_pkt_reset(core->tx[i].tx_pkt);
> >           net_tx_pkt_uninit(core->tx[i].tx_pkt);
> > +        qemu_bh_delete(core->tx[i].tx_bh);
> > +        core->tx[i].tx_bh = NULL;
> >       }
> >
> >       net_rx_pkt_uninit(core->rx_pkt);
> > diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> > index aee32f7e48..94ddc6afc2 100644
> > --- a/hw/net/e1000e_core.h
> > +++ b/hw/net/e1000e_core.h
> > @@ -77,6 +77,8 @@ struct E1000Core {
> >           unsigned char sum_needed;
> >           bool cptse;
> >           struct NetTxPkt *tx_pkt;
> > +        QEMUBH *tx_bh;
> > +        E1000ECore *core;
> >       } tx[E1000E_NUM_QUEUES];
> >
> >       struct NetRxPkt *rx_pkt;
>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17  4:46   ` Li Qiang
@ 2020-07-17  5:38     ` Jason Wang
  2020-07-17 15:46       ` Li Qiang
  0 siblings, 1 reply; 9+ messages in thread
From: Jason Wang @ 2020-07-17  5:38 UTC (permalink / raw)
  To: Li Qiang
  Cc: Paolo Bonzini, Dmitry Fleytman, Li Qiang, Qemu Developers,
	Michael S. Tsirkin


On 2020/7/17 下午12:46, Li Qiang wrote:
> Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 上午11:10写道:
>>
>> On 2020/7/17 上午12:14, Li Qiang wrote:
>>> Alexander Bulekov reported a UAF bug related e1000e packets send.
>>>
>>> -->https://bugs.launchpad.net/qemu/+bug/1886362
>>>
>>> This is because the guest trigger a e1000e packet send and set the
>>> data's address to e1000e's MMIO address. So when the e1000e do DMA
>>> it will write the MMIO again and trigger re-entrancy and finally
>>> causes this UAF.
>>>
>>> Paolo suggested to use a bottom half whenever MMIO is doing complicate
>>> things in here:
>>> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
>>>
>>> Reference here:
>>> 'The easiest solution is to delay processing of descriptors to a bottom
>>> half whenever MMIO is doing something complicated.  This is also better
>>> for latency because it will free the vCPU thread more quickly and leave
>>> the work to the I/O thread.'
>>
>> I think several things were missed in this patch (take virtio-net as a
>> reference), do we need the following things:
>>
> Thanks Jason,
> In fact I know this, I'm scared for touching this but I want to try.
> Thanks for your advice.
>
>> - Cancel the bh when VM is stopped.
> Ok. I think add a vm state change notifier for e1000e can address this.
>
>> - A throttle to prevent bh from executing too much timer?
> Ok, I think add a config timeout and add a timer in e1000e can address this.


Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.


>
>> - A flag to record whether or not this a pending tx (and migrate it?)
> Is just a flag enough? Could you explain more about the idea behind
> processing the virtio-net/e1000e using bh like this?


Virtio-net use a tx_waiting variable to record whether or not there's a 
pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it 
after vmresume). Maybe we can do something simpler by just schecule bh 
unconditionally during vm resuming.


> For example, if the guest trigger a lot of packets send and if the bh
> is scheduled in IO thread. So will we lost packets?


We don't since we don't populate virtqueue which means packets are 
queued there.

Thanks


> How we avoid this in virtio-net.
>
> Thanks,
> Li Qiang
>
>
>
>> Thanks
>>
>>
>>> This patch fixes this UAF.
>>>
>>> Signed-off-by: Li Qiang <liq3ea@163.com>
>>> ---
>>>    hw/net/e1000e_core.c | 25 +++++++++++++++++--------
>>>    hw/net/e1000e_core.h |  2 ++
>>>    2 files changed, 19 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>>> index bcd186cac5..6165b04b68 100644
>>> --- a/hw/net/e1000e_core.c
>>> +++ b/hw/net/e1000e_core.c
>>> @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
>>>    static void
>>>    e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
>>>    {
>>> -    E1000E_TxRing txr;
>>>        core->mac[index] = val;
>>>
>>>        if (core->mac[TARC0] & E1000_TARC_ENABLE) {
>>> -        e1000e_tx_ring_init(core, &txr, 0);
>>> -        e1000e_start_xmit(core, &txr);
>>> +        qemu_bh_schedule(core->tx[0].tx_bh);
>>>        }
>>>
>>>        if (core->mac[TARC1] & E1000_TARC_ENABLE) {
>>> -        e1000e_tx_ring_init(core, &txr, 1);
>>> -        e1000e_start_xmit(core, &txr);
>>> +        qemu_bh_schedule(core->tx[1].tx_bh);
>>>        }
>>>    }
>>>
>>>    static void
>>>    e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
>>>    {
>>> -    E1000E_TxRing txr;
>>>        int qidx = e1000e_mq_queue_idx(TDT, index);
>>>        uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
>>>
>>>        core->mac[index] = val & 0xffff;
>>>
>>>        if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
>>> -        e1000e_tx_ring_init(core, &txr, qidx);
>>> -        e1000e_start_xmit(core, &txr);
>>> +        qemu_bh_schedule(core->tx[qidx].tx_bh);
>>>        }
>>>    }
>>>
>>> @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
>>>        }
>>>    }
>>>
>>> +static void e1000e_core_tx_bh(void *opaque)
>>> +{
>>> +    struct e1000e_tx *tx = opaque;
>>> +    E1000ECore *core = tx->core;
>>> +    E1000E_TxRing txr;
>>> +
>>> +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
>>> +    e1000e_start_xmit(core, &txr);
>>> +}
>>> +
>>>    void
>>>    e1000e_core_pci_realize(E1000ECore     *core,
>>>                            const uint16_t *eeprom_templ,
>>> @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
>>>        for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>>>            net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
>>>                            E1000E_MAX_TX_FRAGS, core->has_vnet);
>>> +        core->tx[i].core = core;
>>> +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
>>>        }
>>>
>>>        net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
>>> @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
>>>        for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>>>            net_tx_pkt_reset(core->tx[i].tx_pkt);
>>>            net_tx_pkt_uninit(core->tx[i].tx_pkt);
>>> +        qemu_bh_delete(core->tx[i].tx_bh);
>>> +        core->tx[i].tx_bh = NULL;
>>>        }
>>>
>>>        net_rx_pkt_uninit(core->rx_pkt);
>>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
>>> index aee32f7e48..94ddc6afc2 100644
>>> --- a/hw/net/e1000e_core.h
>>> +++ b/hw/net/e1000e_core.h
>>> @@ -77,6 +77,8 @@ struct E1000Core {
>>>            unsigned char sum_needed;
>>>            bool cptse;
>>>            struct NetTxPkt *tx_pkt;
>>> +        QEMUBH *tx_bh;
>>> +        E1000ECore *core;
>>>        } tx[E1000E_NUM_QUEUES];
>>>
>>>        struct NetRxPkt *rx_pkt;



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17  5:38     ` Jason Wang
@ 2020-07-17 15:46       ` Li Qiang
  2020-07-20  3:59         ` Jason Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Li Qiang @ 2020-07-17 15:46 UTC (permalink / raw)
  To: Jason Wang
  Cc: Paolo Bonzini, Dmitry Fleytman, Li Qiang, Qemu Developers,
	Michael S. Tsirkin

Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 下午1:39写道:
>
>
> On 2020/7/17 下午12:46, Li Qiang wrote:
> > Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 上午11:10写道:
> >>
> >> On 2020/7/17 上午12:14, Li Qiang wrote:
> >>> Alexander Bulekov reported a UAF bug related e1000e packets send.
> >>>
> >>> -->https://bugs.launchpad.net/qemu/+bug/1886362
> >>>
> >>> This is because the guest trigger a e1000e packet send and set the
> >>> data's address to e1000e's MMIO address. So when the e1000e do DMA
> >>> it will write the MMIO again and trigger re-entrancy and finally
> >>> causes this UAF.
> >>>
> >>> Paolo suggested to use a bottom half whenever MMIO is doing complicate
> >>> things in here:
> >>> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
> >>>
> >>> Reference here:
> >>> 'The easiest solution is to delay processing of descriptors to a bottom
> >>> half whenever MMIO is doing something complicated.  This is also better
> >>> for latency because it will free the vCPU thread more quickly and leave
> >>> the work to the I/O thread.'
> >>
> >> I think several things were missed in this patch (take virtio-net as a
> >> reference), do we need the following things:
> >>
> > Thanks Jason,
> > In fact I know this, I'm scared for touching this but I want to try.
> > Thanks for your advice.
> >
> >> - Cancel the bh when VM is stopped.
> > Ok. I think add a vm state change notifier for e1000e can address this.
> >
> >> - A throttle to prevent bh from executing too much timer?
> > Ok, I think add a config timeout and add a timer in e1000e can address this.
>
>
> Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.
>
>
> >
> >> - A flag to record whether or not this a pending tx (and migrate it?)
> > Is just a flag enough? Could you explain more about the idea behind
> > processing the virtio-net/e1000e using bh like this?
>
>
> Virtio-net use a tx_waiting variable to record whether or not there's a
> pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it
> after vmresume). Maybe we can do something simpler by just schecule bh
> unconditionally during vm resuming.
>
>
> > For example, if the guest trigger a lot of packets send and if the bh
> > is scheduled in IO thread. So will we lost packets?
>
>
> We don't since we don't populate virtqueue which means packets are
> queued there.
>

This remind of me a question:
If we use tx_burst like in virtion-net. For detail:
If we sent out  'tx_burst' packets per bh. Then we set 'tx_waiting' and
then schedule another bh. However if between two bh schedule, the guest change
the e1000e register such 'r->dh' 'r->dlen'. The data is fully corrupted.
In fact this issue does exist in my origin patch.  That's
What if following happend:

vcpu thread: guest write e1000e MMIO to trigger packets send
vcpu thread: schedule a bh
vcpu thread: return
IO thread: begin to run the bh and start send packets
vcpu thread: write register again such as  'r->dh' 'r->dlen'..

So here the IO thread and vcpu thread will race the register?

If I remember correctly, the virtio net has no such problem because it
uses ring buffer
and the backedn(virtio device) uses the shadow index to index the ring
buffer data.

What's your idea here?

Thanks,
Li Qiang


> Thanks
>
>
> > How we avoid this in virtio-net.
> >
> > Thanks,
> > Li Qiang
> >
> >
> >
> >> Thanks
> >>
> >>
> >>> This patch fixes this UAF.
> >>>
> >>> Signed-off-by: Li Qiang <liq3ea@163.com>
> >>> ---
> >>>    hw/net/e1000e_core.c | 25 +++++++++++++++++--------
> >>>    hw/net/e1000e_core.h |  2 ++
> >>>    2 files changed, 19 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> >>> index bcd186cac5..6165b04b68 100644
> >>> --- a/hw/net/e1000e_core.c
> >>> +++ b/hw/net/e1000e_core.c
> >>> @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
> >>>    static void
> >>>    e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
> >>>    {
> >>> -    E1000E_TxRing txr;
> >>>        core->mac[index] = val;
> >>>
> >>>        if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> >>> -        e1000e_tx_ring_init(core, &txr, 0);
> >>> -        e1000e_start_xmit(core, &txr);
> >>> +        qemu_bh_schedule(core->tx[0].tx_bh);
> >>>        }
> >>>
> >>>        if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> >>> -        e1000e_tx_ring_init(core, &txr, 1);
> >>> -        e1000e_start_xmit(core, &txr);
> >>> +        qemu_bh_schedule(core->tx[1].tx_bh);
> >>>        }
> >>>    }
> >>>
> >>>    static void
> >>>    e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
> >>>    {
> >>> -    E1000E_TxRing txr;
> >>>        int qidx = e1000e_mq_queue_idx(TDT, index);
> >>>        uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
> >>>
> >>>        core->mac[index] = val & 0xffff;
> >>>
> >>>        if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
> >>> -        e1000e_tx_ring_init(core, &txr, qidx);
> >>> -        e1000e_start_xmit(core, &txr);
> >>> +        qemu_bh_schedule(core->tx[qidx].tx_bh);
> >>>        }
> >>>    }
> >>>
> >>> @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
> >>>        }
> >>>    }
> >>>
> >>> +static void e1000e_core_tx_bh(void *opaque)
> >>> +{
> >>> +    struct e1000e_tx *tx = opaque;
> >>> +    E1000ECore *core = tx->core;
> >>> +    E1000E_TxRing txr;
> >>> +
> >>> +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
> >>> +    e1000e_start_xmit(core, &txr);
> >>> +}
> >>> +
> >>>    void
> >>>    e1000e_core_pci_realize(E1000ECore     *core,
> >>>                            const uint16_t *eeprom_templ,
> >>> @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
> >>>        for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >>>            net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
> >>>                            E1000E_MAX_TX_FRAGS, core->has_vnet);
> >>> +        core->tx[i].core = core;
> >>> +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
> >>>        }
> >>>
> >>>        net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
> >>> @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
> >>>        for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >>>            net_tx_pkt_reset(core->tx[i].tx_pkt);
> >>>            net_tx_pkt_uninit(core->tx[i].tx_pkt);
> >>> +        qemu_bh_delete(core->tx[i].tx_bh);
> >>> +        core->tx[i].tx_bh = NULL;
> >>>        }
> >>>
> >>>        net_rx_pkt_uninit(core->rx_pkt);
> >>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> >>> index aee32f7e48..94ddc6afc2 100644
> >>> --- a/hw/net/e1000e_core.h
> >>> +++ b/hw/net/e1000e_core.h
> >>> @@ -77,6 +77,8 @@ struct E1000Core {
> >>>            unsigned char sum_needed;
> >>>            bool cptse;
> >>>            struct NetTxPkt *tx_pkt;
> >>> +        QEMUBH *tx_bh;
> >>> +        E1000ECore *core;
> >>>        } tx[E1000E_NUM_QUEUES];
> >>>
> >>>        struct NetRxPkt *rx_pkt;
>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17  3:10 ` Jason Wang
  2020-07-17  4:46   ` Li Qiang
@ 2020-07-17 15:52   ` Peter Maydell
  2020-07-20  4:00     ` Jason Wang
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Maydell @ 2020-07-17 15:52 UTC (permalink / raw)
  To: Jason Wang
  Cc: Dmitry Fleytman, Michael S. Tsirkin, Li Qiang, Li Qiang,
	QEMU Developers, Paolo Bonzini

On Fri, 17 Jul 2020 at 04:11, Jason Wang <jasowang@redhat.com> wrote:
> I think several things were missed in this patch (take virtio-net as a
> reference), do we need the following things:
>
> - Cancel the bh when VM is stopped.

Similarly, what should we do with the bh when the device
is reset ?

> - A throttle to prevent bh from executing too much timer?
> - A flag to record whether or not this a pending tx (and migrate it?)

thanks
-- PMM


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17 15:46       ` Li Qiang
@ 2020-07-20  3:59         ` Jason Wang
  2020-07-20  4:41           ` Li Qiang
  0 siblings, 1 reply; 9+ messages in thread
From: Jason Wang @ 2020-07-20  3:59 UTC (permalink / raw)
  To: Li Qiang
  Cc: Paolo Bonzini, Dmitry Fleytman, Li Qiang, Qemu Developers,
	Michael S. Tsirkin


On 2020/7/17 下午11:46, Li Qiang wrote:
> Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 下午1:39写道:
>>
>> On 2020/7/17 下午12:46, Li Qiang wrote:
>>> Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 上午11:10写道:
>>>> On 2020/7/17 上午12:14, Li Qiang wrote:
>>>>> Alexander Bulekov reported a UAF bug related e1000e packets send.
>>>>>
>>>>> -->https://bugs.launchpad.net/qemu/+bug/1886362
>>>>>
>>>>> This is because the guest trigger a e1000e packet send and set the
>>>>> data's address to e1000e's MMIO address. So when the e1000e do DMA
>>>>> it will write the MMIO again and trigger re-entrancy and finally
>>>>> causes this UAF.
>>>>>
>>>>> Paolo suggested to use a bottom half whenever MMIO is doing complicate
>>>>> things in here:
>>>>> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
>>>>>
>>>>> Reference here:
>>>>> 'The easiest solution is to delay processing of descriptors to a bottom
>>>>> half whenever MMIO is doing something complicated.  This is also better
>>>>> for latency because it will free the vCPU thread more quickly and leave
>>>>> the work to the I/O thread.'
>>>> I think several things were missed in this patch (take virtio-net as a
>>>> reference), do we need the following things:
>>>>
>>> Thanks Jason,
>>> In fact I know this, I'm scared for touching this but I want to try.
>>> Thanks for your advice.
>>>
>>>> - Cancel the bh when VM is stopped.
>>> Ok. I think add a vm state change notifier for e1000e can address this.
>>>
>>>> - A throttle to prevent bh from executing too much timer?
>>> Ok, I think add a config timeout and add a timer in e1000e can address this.
>>
>> Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.
>>
>>
>>>> - A flag to record whether or not this a pending tx (and migrate it?)
>>> Is just a flag enough? Could you explain more about the idea behind
>>> processing the virtio-net/e1000e using bh like this?
>>
>> Virtio-net use a tx_waiting variable to record whether or not there's a
>> pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it
>> after vmresume). Maybe we can do something simpler by just schecule bh
>> unconditionally during vm resuming.
>>
>>
>>> For example, if the guest trigger a lot of packets send and if the bh
>>> is scheduled in IO thread. So will we lost packets?
>>
>> We don't since we don't populate virtqueue which means packets are
>> queued there.
>>
> This remind of me a question:
> If we use tx_burst like in virtion-net. For detail:
> If we sent out  'tx_burst' packets per bh. Then we set 'tx_waiting' and
> then schedule another bh. However if between two bh schedule, the guest change
> the e1000e register such 'r->dh' 'r->dlen'. The data is fully corrupted.
> In fact this issue does exist in my origin patch.  That's
> What if following happend:
>
> vcpu thread: guest write e1000e MMIO to trigger packets send
> vcpu thread: schedule a bh
> vcpu thread: return
> IO thread: begin to run the bh and start send packets
> vcpu thread: write register again such as  'r->dh' 'r->dlen'..
>
> So here the IO thread and vcpu thread will race the register?
>
> If I remember correctly, the virtio net has no such problem because it
> uses ring buffer
> and the backedn(virtio device) uses the shadow index to index the ring
> buffer data.
>
> What's your idea here?


I think we serialize them through bql? (qemu_mutex_lock_iothread())

Thanks


>
> Thanks,
> Li Qiang
>
>
>> Thanks
>>
>>
>>> How we avoid this in virtio-net.
>>>
>>> Thanks,
>>> Li Qiang
>>>
>>>
>>>
>>>> Thanks
>>>>
>>>>
>>>>> This patch fixes this UAF.
>>>>>
>>>>> Signed-off-by: Li Qiang <liq3ea@163.com>
>>>>> ---
>>>>>     hw/net/e1000e_core.c | 25 +++++++++++++++++--------
>>>>>     hw/net/e1000e_core.h |  2 ++
>>>>>     2 files changed, 19 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
>>>>> index bcd186cac5..6165b04b68 100644
>>>>> --- a/hw/net/e1000e_core.c
>>>>> +++ b/hw/net/e1000e_core.c
>>>>> @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
>>>>>     static void
>>>>>     e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
>>>>>     {
>>>>> -    E1000E_TxRing txr;
>>>>>         core->mac[index] = val;
>>>>>
>>>>>         if (core->mac[TARC0] & E1000_TARC_ENABLE) {
>>>>> -        e1000e_tx_ring_init(core, &txr, 0);
>>>>> -        e1000e_start_xmit(core, &txr);
>>>>> +        qemu_bh_schedule(core->tx[0].tx_bh);
>>>>>         }
>>>>>
>>>>>         if (core->mac[TARC1] & E1000_TARC_ENABLE) {
>>>>> -        e1000e_tx_ring_init(core, &txr, 1);
>>>>> -        e1000e_start_xmit(core, &txr);
>>>>> +        qemu_bh_schedule(core->tx[1].tx_bh);
>>>>>         }
>>>>>     }
>>>>>
>>>>>     static void
>>>>>     e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
>>>>>     {
>>>>> -    E1000E_TxRing txr;
>>>>>         int qidx = e1000e_mq_queue_idx(TDT, index);
>>>>>         uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
>>>>>
>>>>>         core->mac[index] = val & 0xffff;
>>>>>
>>>>>         if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
>>>>> -        e1000e_tx_ring_init(core, &txr, qidx);
>>>>> -        e1000e_start_xmit(core, &txr);
>>>>> +        qemu_bh_schedule(core->tx[qidx].tx_bh);
>>>>>         }
>>>>>     }
>>>>>
>>>>> @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
>>>>>         }
>>>>>     }
>>>>>
>>>>> +static void e1000e_core_tx_bh(void *opaque)
>>>>> +{
>>>>> +    struct e1000e_tx *tx = opaque;
>>>>> +    E1000ECore *core = tx->core;
>>>>> +    E1000E_TxRing txr;
>>>>> +
>>>>> +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
>>>>> +    e1000e_start_xmit(core, &txr);
>>>>> +}
>>>>> +
>>>>>     void
>>>>>     e1000e_core_pci_realize(E1000ECore     *core,
>>>>>                             const uint16_t *eeprom_templ,
>>>>> @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
>>>>>         for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>>>>>             net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
>>>>>                             E1000E_MAX_TX_FRAGS, core->has_vnet);
>>>>> +        core->tx[i].core = core;
>>>>> +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
>>>>>         }
>>>>>
>>>>>         net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
>>>>> @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
>>>>>         for (i = 0; i < E1000E_NUM_QUEUES; i++) {
>>>>>             net_tx_pkt_reset(core->tx[i].tx_pkt);
>>>>>             net_tx_pkt_uninit(core->tx[i].tx_pkt);
>>>>> +        qemu_bh_delete(core->tx[i].tx_bh);
>>>>> +        core->tx[i].tx_bh = NULL;
>>>>>         }
>>>>>
>>>>>         net_rx_pkt_uninit(core->rx_pkt);
>>>>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
>>>>> index aee32f7e48..94ddc6afc2 100644
>>>>> --- a/hw/net/e1000e_core.h
>>>>> +++ b/hw/net/e1000e_core.h
>>>>> @@ -77,6 +77,8 @@ struct E1000Core {
>>>>>             unsigned char sum_needed;
>>>>>             bool cptse;
>>>>>             struct NetTxPkt *tx_pkt;
>>>>> +        QEMUBH *tx_bh;
>>>>> +        E1000ECore *core;
>>>>>         } tx[E1000E_NUM_QUEUES];
>>>>>
>>>>>         struct NetRxPkt *rx_pkt;



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-17 15:52   ` Peter Maydell
@ 2020-07-20  4:00     ` Jason Wang
  0 siblings, 0 replies; 9+ messages in thread
From: Jason Wang @ 2020-07-20  4:00 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Dmitry Fleytman, Michael S. Tsirkin, Li Qiang, Li Qiang,
	QEMU Developers, Paolo Bonzini


On 2020/7/17 下午11:52, Peter Maydell wrote:
> On Fri, 17 Jul 2020 at 04:11, Jason Wang <jasowang@redhat.com> wrote:
>> I think several things were missed in this patch (take virtio-net as a
>> reference), do we need the following things:
>>
>> - Cancel the bh when VM is stopped.
> Similarly, what should we do with the bh when the device
> is reset ?


I think we need cancel the bh.

Thanks


>
>> - A throttle to prevent bh from executing too much timer?
>> - A flag to record whether or not this a pending tx (and migrate it?)
> thanks
> -- PMM
>



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] e1000e: using bottom half to send packets
  2020-07-20  3:59         ` Jason Wang
@ 2020-07-20  4:41           ` Li Qiang
  0 siblings, 0 replies; 9+ messages in thread
From: Li Qiang @ 2020-07-20  4:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: Paolo Bonzini, Dmitry Fleytman, Li Qiang, Qemu Developers,
	Michael S. Tsirkin

Jason Wang <jasowang@redhat.com> 于2020年7月20日周一 下午12:00写道:
>
>
> On 2020/7/17 下午11:46, Li Qiang wrote:
> > Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 下午1:39写道:
> >>
> >> On 2020/7/17 下午12:46, Li Qiang wrote:
> >>> Jason Wang <jasowang@redhat.com> 于2020年7月17日周五 上午11:10写道:
> >>>> On 2020/7/17 上午12:14, Li Qiang wrote:
> >>>>> Alexander Bulekov reported a UAF bug related e1000e packets send.
> >>>>>
> >>>>> -->https://bugs.launchpad.net/qemu/+bug/1886362
> >>>>>
> >>>>> This is because the guest trigger a e1000e packet send and set the
> >>>>> data's address to e1000e's MMIO address. So when the e1000e do DMA
> >>>>> it will write the MMIO again and trigger re-entrancy and finally
> >>>>> causes this UAF.
> >>>>>
> >>>>> Paolo suggested to use a bottom half whenever MMIO is doing complicate
> >>>>> things in here:
> >>>>> -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
> >>>>>
> >>>>> Reference here:
> >>>>> 'The easiest solution is to delay processing of descriptors to a bottom
> >>>>> half whenever MMIO is doing something complicated.  This is also better
> >>>>> for latency because it will free the vCPU thread more quickly and leave
> >>>>> the work to the I/O thread.'
> >>>> I think several things were missed in this patch (take virtio-net as a
> >>>> reference), do we need the following things:
> >>>>
> >>> Thanks Jason,
> >>> In fact I know this, I'm scared for touching this but I want to try.
> >>> Thanks for your advice.
> >>>
> >>>> - Cancel the bh when VM is stopped.
> >>> Ok. I think add a vm state change notifier for e1000e can address this.
> >>>
> >>>> - A throttle to prevent bh from executing too much timer?
> >>> Ok, I think add a config timeout and add a timer in e1000e can address this.
> >>
> >> Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.
> >>
> >>
> >>>> - A flag to record whether or not this a pending tx (and migrate it?)
> >>> Is just a flag enough? Could you explain more about the idea behind
> >>> processing the virtio-net/e1000e using bh like this?
> >>
> >> Virtio-net use a tx_waiting variable to record whether or not there's a
> >> pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it
> >> after vmresume). Maybe we can do something simpler by just schecule bh
> >> unconditionally during vm resuming.
> >>
> >>
> >>> For example, if the guest trigger a lot of packets send and if the bh
> >>> is scheduled in IO thread. So will we lost packets?
> >>
> >> We don't since we don't populate virtqueue which means packets are
> >> queued there.
> >>
> > This remind of me a question:
> > If we use tx_burst like in virtion-net. For detail:
> > If we sent out  'tx_burst' packets per bh. Then we set 'tx_waiting' and
> > then schedule another bh. However if between two bh schedule, the guest change
> > the e1000e register such 'r->dh' 'r->dlen'. The data is fully corrupted.
> > In fact this issue does exist in my origin patch.  That's
> > What if following happend:
> >
> > vcpu thread: guest write e1000e MMIO to trigger packets send
> > vcpu thread: schedule a bh
> > vcpu thread: return
> > IO thread: begin to run the bh and start send packets
> > vcpu thread: write register again such as  'r->dh' 'r->dlen'..
> >
> > So here the IO thread and vcpu thread will race the register?
> >
> > If I remember correctly, the virtio net has no such problem because it
> > uses ring buffer
> > and the backedn(virtio device) uses the shadow index to index the ring
> > buffer data.
> >
> > What's your idea here?
>
>
> I think we serialize them through bql? (qemu_mutex_lock_iothread())
>

Ok I will try to cook a patch tonight based our discussion.

Thanks,
Li Qiang

> Thanks
>
>
> >
> > Thanks,
> > Li Qiang
> >
> >
> >> Thanks
> >>
> >>
> >>> How we avoid this in virtio-net.
> >>>
> >>> Thanks,
> >>> Li Qiang
> >>>
> >>>
> >>>
> >>>> Thanks
> >>>>
> >>>>
> >>>>> This patch fixes this UAF.
> >>>>>
> >>>>> Signed-off-by: Li Qiang <liq3ea@163.com>
> >>>>> ---
> >>>>>     hw/net/e1000e_core.c | 25 +++++++++++++++++--------
> >>>>>     hw/net/e1000e_core.h |  2 ++
> >>>>>     2 files changed, 19 insertions(+), 8 deletions(-)
> >>>>>
> >>>>> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> >>>>> index bcd186cac5..6165b04b68 100644
> >>>>> --- a/hw/net/e1000e_core.c
> >>>>> +++ b/hw/net/e1000e_core.c
> >>>>> @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val)
> >>>>>     static void
> >>>>>     e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
> >>>>>     {
> >>>>> -    E1000E_TxRing txr;
> >>>>>         core->mac[index] = val;
> >>>>>
> >>>>>         if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> >>>>> -        e1000e_tx_ring_init(core, &txr, 0);
> >>>>> -        e1000e_start_xmit(core, &txr);
> >>>>> +        qemu_bh_schedule(core->tx[0].tx_bh);
> >>>>>         }
> >>>>>
> >>>>>         if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> >>>>> -        e1000e_tx_ring_init(core, &txr, 1);
> >>>>> -        e1000e_start_xmit(core, &txr);
> >>>>> +        qemu_bh_schedule(core->tx[1].tx_bh);
> >>>>>         }
> >>>>>     }
> >>>>>
> >>>>>     static void
> >>>>>     e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
> >>>>>     {
> >>>>> -    E1000E_TxRing txr;
> >>>>>         int qidx = e1000e_mq_queue_idx(TDT, index);
> >>>>>         uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;
> >>>>>
> >>>>>         core->mac[index] = val & 0xffff;
> >>>>>
> >>>>>         if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
> >>>>> -        e1000e_tx_ring_init(core, &txr, qidx);
> >>>>> -        e1000e_start_xmit(core, &txr);
> >>>>> +        qemu_bh_schedule(core->tx[qidx].tx_bh);
> >>>>>         }
> >>>>>     }
> >>>>>
> >>>>> @@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, RunState state)
> >>>>>         }
> >>>>>     }
> >>>>>
> >>>>> +static void e1000e_core_tx_bh(void *opaque)
> >>>>> +{
> >>>>> +    struct e1000e_tx *tx = opaque;
> >>>>> +    E1000ECore *core = tx->core;
> >>>>> +    E1000E_TxRing txr;
> >>>>> +
> >>>>> +    e1000e_tx_ring_init(core, &txr, tx - &core->tx[0]);
> >>>>> +    e1000e_start_xmit(core, &txr);
> >>>>> +}
> >>>>> +
> >>>>>     void
> >>>>>     e1000e_core_pci_realize(E1000ECore     *core,
> >>>>>                             const uint16_t *eeprom_templ,
> >>>>> @@ -3340,6 +3345,8 @@ e1000e_core_pci_realize(E1000ECore     *core,
> >>>>>         for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >>>>>             net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner,
> >>>>>                             E1000E_MAX_TX_FRAGS, core->has_vnet);
> >>>>> +        core->tx[i].core = core;
> >>>>> +        core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]);
> >>>>>         }
> >>>>>
> >>>>>         net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
> >>>>> @@ -3367,6 +3374,8 @@ e1000e_core_pci_uninit(E1000ECore *core)
> >>>>>         for (i = 0; i < E1000E_NUM_QUEUES; i++) {
> >>>>>             net_tx_pkt_reset(core->tx[i].tx_pkt);
> >>>>>             net_tx_pkt_uninit(core->tx[i].tx_pkt);
> >>>>> +        qemu_bh_delete(core->tx[i].tx_bh);
> >>>>> +        core->tx[i].tx_bh = NULL;
> >>>>>         }
> >>>>>
> >>>>>         net_rx_pkt_uninit(core->rx_pkt);
> >>>>> diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
> >>>>> index aee32f7e48..94ddc6afc2 100644
> >>>>> --- a/hw/net/e1000e_core.h
> >>>>> +++ b/hw/net/e1000e_core.h
> >>>>> @@ -77,6 +77,8 @@ struct E1000Core {
> >>>>>             unsigned char sum_needed;
> >>>>>             bool cptse;
> >>>>>             struct NetTxPkt *tx_pkt;
> >>>>> +        QEMUBH *tx_bh;
> >>>>> +        E1000ECore *core;
> >>>>>         } tx[E1000E_NUM_QUEUES];
> >>>>>
> >>>>>         struct NetRxPkt *rx_pkt;
>


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-07-20  4:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-16 16:14 [PATCH] e1000e: using bottom half to send packets Li Qiang
2020-07-17  3:10 ` Jason Wang
2020-07-17  4:46   ` Li Qiang
2020-07-17  5:38     ` Jason Wang
2020-07-17 15:46       ` Li Qiang
2020-07-20  3:59         ` Jason Wang
2020-07-20  4:41           ` Li Qiang
2020-07-17 15:52   ` Peter Maydell
2020-07-20  4:00     ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.