qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] mc146818rtc: fix timer interrupt reinjection
@ 2019-10-10 12:30 Marcelo Tosatti
  2019-10-10 12:35 ` Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2019-10-10 12:30 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini, Xiao Guangrong, Vadim Rozenfeld


commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
reinjection when there is no period change by the guest.

In that case, old_period is 0, which ends up zeroing irq_coalesced
(counter of reinjected interrupts).

The consequence is Windows 7 is unable to synchronize time via NTP.
Easily reproducible by playing a fullscreen video with cirrus and VNC.

Fix by not updating s->irq_coalesced when old_period is 0.

V2: reorganize code (Paolo Bonzini)

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 6cb378751b..0e7cf97042 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -203,24 +203,28 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
 
     period = rtc_periodic_clock_ticks(s);
 
-    if (period) {
-        /* compute 32 khz clock */
-        cur_clock =
-            muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+    if (!period) {
+        s->irq_coalesced = 0;
+        timer_del(s->periodic_timer);
+        return;
+    }
 
-        /*
-        * if the periodic timer's update is due to period re-configuration,
-        * we should count the clock since last interrupt.
-        */
-        if (old_period) {
-            int64_t last_periodic_clock, next_periodic_clock;
-
-            next_periodic_clock = muldiv64(s->next_periodic_time,
-                                    RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
-            last_periodic_clock = next_periodic_clock - old_period;
-            lost_clock = cur_clock - last_periodic_clock;
-            assert(lost_clock >= 0);
-        }
+    /* compute 32 khz clock */
+    cur_clock =
+        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+
+    /*
+     * if the periodic timer's update is due to period re-configuration,
+     * we should count the clock since last interrupt.
+     */
+    if (old_period) {
+        int64_t last_periodic_clock, next_periodic_clock;
+
+        next_periodic_clock = muldiv64(s->next_periodic_time,
+                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+        last_periodic_clock = next_periodic_clock - old_period;
+        lost_clock = cur_clock - last_periodic_clock;
+        assert(lost_clock >= 0);
 
         /*
          * s->irq_coalesced can change for two reasons:
@@ -251,22 +255,19 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
                 rtc_coalesced_timer_update(s);
             }
         } else {
-           /*
+            /*
              * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
              * is not used, we should make the time progress anyway.
              */
             lost_clock = MIN(lost_clock, period);
         }
+    }
 
-        assert(lost_clock >= 0 && lost_clock <= period);
+    assert(lost_clock >= 0 && lost_clock <= period);
 
-        next_irq_clock = cur_clock + period - lost_clock;
-        s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
-        timer_mod(s->periodic_timer, s->next_periodic_time);
-    } else {
-        s->irq_coalesced = 0;
-        timer_del(s->periodic_timer);
-    }
+    next_irq_clock = cur_clock + period - lost_clock;
+    s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
+    timer_mod(s->periodic_timer, s->next_periodic_time);
 }
 
 static void rtc_periodic_timer(void *opaque)



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-10-10 12:30 [PATCH v2] mc146818rtc: fix timer interrupt reinjection Marcelo Tosatti
@ 2019-10-10 12:35 ` Paolo Bonzini
  2019-10-24 11:22 ` Philippe Mathieu-Daudé
  2019-11-16 20:58 ` Alex Williamson
  2 siblings, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2019-10-10 12:35 UTC (permalink / raw)
  To: Marcelo Tosatti, qemu-devel; +Cc: Xiao Guangrong, Vadim Rozenfeld

On 10/10/19 14:30, Marcelo Tosatti wrote:
> 
> commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
> reinjection when there is no period change by the guest.
> 
> In that case, old_period is 0, which ends up zeroing irq_coalesced
> (counter of reinjected interrupts).
> 
> The consequence is Windows 7 is unable to synchronize time via NTP.
> Easily reproducible by playing a fullscreen video with cirrus and VNC.
> 
> Fix by not updating s->irq_coalesced when old_period is 0.
> 
> V2: reorganize code (Paolo Bonzini)
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
> index 6cb378751b..0e7cf97042 100644
> --- a/hw/timer/mc146818rtc.c
> +++ b/hw/timer/mc146818rtc.c
> @@ -203,24 +203,28 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>  
>      period = rtc_periodic_clock_ticks(s);
>  
> -    if (period) {
> -        /* compute 32 khz clock */
> -        cur_clock =
> -            muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +    if (!period) {
> +        s->irq_coalesced = 0;
> +        timer_del(s->periodic_timer);
> +        return;
> +    }
>  
> -        /*
> -        * if the periodic timer's update is due to period re-configuration,
> -        * we should count the clock since last interrupt.
> -        */
> -        if (old_period) {
> -            int64_t last_periodic_clock, next_periodic_clock;
> -
> -            next_periodic_clock = muldiv64(s->next_periodic_time,
> -                                    RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> -            last_periodic_clock = next_periodic_clock - old_period;
> -            lost_clock = cur_clock - last_periodic_clock;
> -            assert(lost_clock >= 0);
> -        }
> +    /* compute 32 khz clock */
> +    cur_clock =
> +        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +
> +    /*
> +     * if the periodic timer's update is due to period re-configuration,
> +     * we should count the clock since last interrupt.
> +     */
> +    if (old_period) {
> +        int64_t last_periodic_clock, next_periodic_clock;
> +
> +        next_periodic_clock = muldiv64(s->next_periodic_time,
> +                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +        last_periodic_clock = next_periodic_clock - old_period;
> +        lost_clock = cur_clock - last_periodic_clock;
> +        assert(lost_clock >= 0);
>  
>          /*
>           * s->irq_coalesced can change for two reasons:
> @@ -251,22 +255,19 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>                  rtc_coalesced_timer_update(s);
>              }
>          } else {
> -           /*
> +            /*
>               * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
>               * is not used, we should make the time progress anyway.
>               */
>              lost_clock = MIN(lost_clock, period);
>          }
> +    }
>  
> -        assert(lost_clock >= 0 && lost_clock <= period);
> +    assert(lost_clock >= 0 && lost_clock <= period);
>  
> -        next_irq_clock = cur_clock + period - lost_clock;
> -        s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> -        timer_mod(s->periodic_timer, s->next_periodic_time);
> -    } else {
> -        s->irq_coalesced = 0;
> -        timer_del(s->periodic_timer);
> -    }
> +    next_irq_clock = cur_clock + period - lost_clock;
> +    s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> +    timer_mod(s->periodic_timer, s->next_periodic_time);
>  }
>  
>  static void rtc_periodic_timer(void *opaque)
> 

Queued, thanks.

Paolo


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-10-10 12:30 [PATCH v2] mc146818rtc: fix timer interrupt reinjection Marcelo Tosatti
  2019-10-10 12:35 ` Paolo Bonzini
@ 2019-10-24 11:22 ` Philippe Mathieu-Daudé
  2019-10-24 12:08   ` Philippe Mathieu-Daudé
  2019-11-16 20:58 ` Alex Williamson
  2 siblings, 1 reply; 12+ messages in thread
From: Philippe Mathieu-Daudé @ 2019-10-24 11:22 UTC (permalink / raw)
  To: Marcelo Tosatti, qemu-devel
  Cc: Paolo Bonzini, Xiao Guangrong, Vadim Rozenfeld

On 10/10/19 2:30 PM, Marcelo Tosatti wrote:
> 
> commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
> reinjection when there is no period change by the guest.
> 
> In that case, old_period is 0, which ends up zeroing irq_coalesced
> (counter of reinjected interrupts).
> 
> The consequence is Windows 7 is unable to synchronize time via NTP.
> Easily reproducible by playing a fullscreen video with cirrus and VNC.
> 
> Fix by not updating s->irq_coalesced when old_period is 0.
> 
> V2: reorganize code (Paolo Bonzini)

^ This line shouldn't be part of git history,
Paolo if possible can you drop it?

> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
> index 6cb378751b..0e7cf97042 100644
> --- a/hw/timer/mc146818rtc.c
> +++ b/hw/timer/mc146818rtc.c
> @@ -203,24 +203,28 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>   
>       period = rtc_periodic_clock_ticks(s);
>   
> -    if (period) {
> -        /* compute 32 khz clock */
> -        cur_clock =
> -            muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +    if (!period) {
> +        s->irq_coalesced = 0;
> +        timer_del(s->periodic_timer);
> +        return;
> +    }

This is the first change, simplify the if statement with a return.

>   
> -        /*
> -        * if the periodic timer's update is due to period re-configuration,
> -        * we should count the clock since last interrupt.
> -        */
> -        if (old_period) {
> -            int64_t last_periodic_clock, next_periodic_clock;
> -
> -            next_periodic_clock = muldiv64(s->next_periodic_time,
> -                                    RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> -            last_periodic_clock = next_periodic_clock - old_period;
> -            lost_clock = cur_clock - last_periodic_clock;
> -            assert(lost_clock >= 0);
> -        }
> +    /* compute 32 khz clock */
> +    cur_clock =
> +        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +
> +    /*
> +     * if the periodic timer's update is due to period re-configuration,
> +     * we should count the clock since last interrupt.
> +     */
> +    if (old_period) {
> +        int64_t last_periodic_clock, next_periodic_clock;
> +
> +        next_periodic_clock = muldiv64(s->next_periodic_time,
> +                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +        last_periodic_clock = next_periodic_clock - old_period;
> +        lost_clock = cur_clock - last_periodic_clock;
> +        assert(lost_clock >= 0);
>   
>           /*
>            * s->irq_coalesced can change for two reasons:
> @@ -251,22 +255,19 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>                   rtc_coalesced_timer_update(s);
>               }
>           } else {
> -           /*
> +            /*
>                * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
>                * is not used, we should make the time progress anyway.
>                */
>               lost_clock = MIN(lost_clock, period);
>           }
> +    }

This is the second change, changing the logic and fixing the bug.

>   
> -        assert(lost_clock >= 0 && lost_clock <= period);
> +    assert(lost_clock >= 0 && lost_clock <= period);
>   
> -        next_irq_clock = cur_clock + period - lost_clock;
> -        s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> -        timer_mod(s->periodic_timer, s->next_periodic_time);
> -    } else {
> -        s->irq_coalesced = 0;
> -        timer_del(s->periodic_timer);
> -    }
> +    next_irq_clock = cur_clock + period - lost_clock;
> +    s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> +    timer_mod(s->periodic_timer, s->next_periodic_time);

This is part of the 1st change.

I'd rather see this patch split in 2 logical changes...

>   }
>   
>   static void rtc_periodic_timer(void *opaque)
> 
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-10-24 11:22 ` Philippe Mathieu-Daudé
@ 2019-10-24 12:08   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 12+ messages in thread
From: Philippe Mathieu-Daudé @ 2019-10-24 12:08 UTC (permalink / raw)
  To: Marcelo Tosatti, qemu-devel
  Cc: Paolo Bonzini, Xiao Guangrong, Vadim Rozenfeld

On 10/24/19 1:22 PM, Philippe Mathieu-Daudé wrote:
> On 10/10/19 2:30 PM, Marcelo Tosatti wrote:
>>
>> commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
>> reinjection when there is no period change by the guest.
>>
>> In that case, old_period is 0, which ends up zeroing irq_coalesced
>> (counter of reinjected interrupts).
>>
>> The consequence is Windows 7 is unable to synchronize time via NTP.
>> Easily reproducible by playing a fullscreen video with cirrus and VNC.
>>
>> Fix by not updating s->irq_coalesced when old_period is 0.
>>
>> V2: reorganize code (Paolo Bonzini)
> 
> ^ This line shouldn't be part of git history,
> Paolo if possible can you drop it?
> 
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
>> index 6cb378751b..0e7cf97042 100644
>> --- a/hw/timer/mc146818rtc.c
>> +++ b/hw/timer/mc146818rtc.c
>> @@ -203,24 +203,28 @@ periodic_timer_update(RTCState *s, int64_t 
>> current_time, uint32_t old_period)
>>       period = rtc_periodic_clock_ticks(s);
>> -    if (period) {
>> -        /* compute 32 khz clock */
>> -        cur_clock =
>> -            muldiv64(current_time, RTC_CLOCK_RATE, 
>> NANOSECONDS_PER_SECOND);
>> +    if (!period) {
>> +        s->irq_coalesced = 0;
>> +        timer_del(s->periodic_timer);
>> +        return;
>> +    }
> 
> This is the first change, simplify the if statement with a return.
> 
>> -        /*
>> -        * if the periodic timer's update is due to period 
>> re-configuration,
>> -        * we should count the clock since last interrupt.
>> -        */
>> -        if (old_period) {
>> -            int64_t last_periodic_clock, next_periodic_clock;
>> -
>> -            next_periodic_clock = muldiv64(s->next_periodic_time,
>> -                                    RTC_CLOCK_RATE, 
>> NANOSECONDS_PER_SECOND);
>> -            last_periodic_clock = next_periodic_clock - old_period;
>> -            lost_clock = cur_clock - last_periodic_clock;
>> -            assert(lost_clock >= 0);
>> -        }
>> +    /* compute 32 khz clock */
>> +    cur_clock =
>> +        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
>> +
>> +    /*
>> +     * if the periodic timer's update is due to period re-configuration,
>> +     * we should count the clock since last interrupt.
>> +     */
>> +    if (old_period) {
>> +        int64_t last_periodic_clock, next_periodic_clock;
>> +
>> +        next_periodic_clock = muldiv64(s->next_periodic_time,
>> +                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
>> +        last_periodic_clock = next_periodic_clock - old_period;
>> +        lost_clock = cur_clock - last_periodic_clock;
>> +        assert(lost_clock >= 0);
>>           /*
>>            * s->irq_coalesced can change for two reasons:
>> @@ -251,22 +255,19 @@ periodic_timer_update(RTCState *s, int64_t 
>> current_time, uint32_t old_period)
>>                   rtc_coalesced_timer_update(s);
>>               }
>>           } else {
>> -           /*
>> +            /*
>>                * no way to compensate the interrupt if 
>> LOST_TICK_POLICY_SLEW
>>                * is not used, we should make the time progress anyway.
>>                */
>>               lost_clock = MIN(lost_clock, period);
>>           }
>> +    }
> 
> This is the second change, changing the logic and fixing the bug.
> 
>> -        assert(lost_clock >= 0 && lost_clock <= period);
>> +    assert(lost_clock >= 0 && lost_clock <= period);
>> -        next_irq_clock = cur_clock + period - lost_clock;
>> -        s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) 
>> + 1;
>> -        timer_mod(s->periodic_timer, s->next_periodic_time);
>> -    } else {
>> -        s->irq_coalesced = 0;
>> -        timer_del(s->periodic_timer);
>> -    }
>> +    next_irq_clock = cur_clock + period - lost_clock;
>> +    s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
>> +    timer_mod(s->periodic_timer, s->next_periodic_time);
> 
> This is part of the 1st change.
> 
> I'd rather see this patch split in 2 logical changes...

Since this split makes sense to me (easier to understand the patch diff 
when reviewing), I can quickly do it if you don't mind.

>>   }
>>   static void rtc_periodic_timer(void *opaque)
>>
>>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-10-10 12:30 [PATCH v2] mc146818rtc: fix timer interrupt reinjection Marcelo Tosatti
  2019-10-10 12:35 ` Paolo Bonzini
  2019-10-24 11:22 ` Philippe Mathieu-Daudé
@ 2019-11-16 20:58 ` Alex Williamson
  2019-11-17  3:20   ` Marcelo Tosatti
  2 siblings, 1 reply; 12+ messages in thread
From: Alex Williamson @ 2019-11-16 20:58 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Paolo Bonzini, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Thu, 10 Oct 2019 09:30:08 -0300
Marcelo Tosatti <mtosatti@redhat.com> wrote:

> commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
> reinjection when there is no period change by the guest.
> 
> In that case, old_period is 0, which ends up zeroing irq_coalesced
> (counter of reinjected interrupts).
> 
> The consequence is Windows 7 is unable to synchronize time via NTP.
> Easily reproducible by playing a fullscreen video with cirrus and VNC.
> 
> Fix by not updating s->irq_coalesced when old_period is 0.
> 
> V2: reorganize code (Paolo Bonzini)
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

This causes a regression for me, my win10 VM with assigned GPU
experiences hangs and slowness with this.  Found via bisect, reverting
restores normal behavior.  libvirt uses this commandline:

/usr/local/bin/qemu-system-x86_64 \
-name guest=Steam-GeForce,debug-threads=on \
-S \
-object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-Steam-GeForce/master-key.aes \
-machine pc-i440fx-4.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off \
-cpu host,hv-time,hv-relaxed,hv-vapic,hv-spinlocks=0x1fff,hv-vendor-id=KeenlyKVM,kvm=off \
-drive file=/usr/share/edk2/ovmf/OVMF_CODE.fd,if=pflash,format=raw,unit=0,readonly=on \
-drive file=/var/lib/libvirt/qemu/nvram/Steam_VARS.fd,if=pflash,format=raw,unit=1 \
-m 4096 \
-mem-prealloc \
-mem-path /dev/hugepages/libvirt/qemu/1-Steam-GeForce \
-overcommit mem-lock=off \
-smp 4,sockets=1,cores=2,threads=2 \
-uuid 2b417d4b-f25b-4522-a5be-e105f032f99c \
-display none \
-no-user-config \
-nodefaults \
-chardev socket,id=charmonitor,fd=38,server,nowait \
-mon chardev=charmonitor,id=monitor,mode=control \
-rtc base=localtime,driftfix=slew \
-global kvm-pit.lost_tick_policy=delay \
-no-hpet \
-no-shutdown \
-boot menu=on,strict=on \
-device nec-usb-xhci,id=usb,bus=pci.0,addr=0x8 \
-device virtio-scsi-pci,id=scsi0,num_queues=4,bus=pci.0,addr=0x5 \
-drive file=/mnt/ssd/Steam.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-0,cache=none \
-device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,device_id=drive-scsi0-0-0-0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2,write-cache=on \
-netdev tap,fd=40,id=hostnet0,vhost=on,vhostfd=41 \
-device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:60:ef:ac,bus=pci.0,addr=0x3 \
-device vfio-pci,host=0000:01:00.0,id=hostdev0,bus=pci.0,addr=0x4,rombar=1 \
-device vfio-pci,host=0000:01:00.1,id=hostdev1,bus=pci.0,addr=0x6,rombar=0 \
-S \
-debugcon file:/tmp/Steam-ovmf-debug.log \
-global isa-debugcon.iobase=0x402 \
-set device.hostdev0.x-pci-vendor-id=0x10de \
-trace events=/var/lib/libvirt/images/Steam-GeForce.events \
-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
-msg timestamp=on

Thanks,
Alex

> diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
> index 6cb378751b..0e7cf97042 100644
> --- a/hw/timer/mc146818rtc.c
> +++ b/hw/timer/mc146818rtc.c
> @@ -203,24 +203,28 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>  
>      period = rtc_periodic_clock_ticks(s);
>  
> -    if (period) {
> -        /* compute 32 khz clock */
> -        cur_clock =
> -            muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +    if (!period) {
> +        s->irq_coalesced = 0;
> +        timer_del(s->periodic_timer);
> +        return;
> +    }
>  
> -        /*
> -        * if the periodic timer's update is due to period re-configuration,
> -        * we should count the clock since last interrupt.
> -        */
> -        if (old_period) {
> -            int64_t last_periodic_clock, next_periodic_clock;
> -
> -            next_periodic_clock = muldiv64(s->next_periodic_time,
> -                                    RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> -            last_periodic_clock = next_periodic_clock - old_period;
> -            lost_clock = cur_clock - last_periodic_clock;
> -            assert(lost_clock >= 0);
> -        }
> +    /* compute 32 khz clock */
> +    cur_clock =
> +        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +
> +    /*
> +     * if the periodic timer's update is due to period re-configuration,
> +     * we should count the clock since last interrupt.
> +     */
> +    if (old_period) {
> +        int64_t last_periodic_clock, next_periodic_clock;
> +
> +        next_periodic_clock = muldiv64(s->next_periodic_time,
> +                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
> +        last_periodic_clock = next_periodic_clock - old_period;
> +        lost_clock = cur_clock - last_periodic_clock;
> +        assert(lost_clock >= 0);
>  
>          /*
>           * s->irq_coalesced can change for two reasons:
> @@ -251,22 +255,19 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>                  rtc_coalesced_timer_update(s);
>              }
>          } else {
> -           /*
> +            /*
>               * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
>               * is not used, we should make the time progress anyway.
>               */
>              lost_clock = MIN(lost_clock, period);
>          }
> +    }
>  
> -        assert(lost_clock >= 0 && lost_clock <= period);
> +    assert(lost_clock >= 0 && lost_clock <= period);
>  
> -        next_irq_clock = cur_clock + period - lost_clock;
> -        s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> -        timer_mod(s->periodic_timer, s->next_periodic_time);
> -    } else {
> -        s->irq_coalesced = 0;
> -        timer_del(s->periodic_timer);
> -    }
> +    next_irq_clock = cur_clock + period - lost_clock;
> +    s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
> +    timer_mod(s->periodic_timer, s->next_periodic_time);
>  }
>  
>  static void rtc_periodic_timer(void *opaque)
> 
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-16 20:58 ` Alex Williamson
@ 2019-11-17  3:20   ` Marcelo Tosatti
  2019-11-17  4:31     ` Alex Williamson
  0 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2019-11-17  3:20 UTC (permalink / raw)
  To: Alex Williamson
  Cc: Paolo Bonzini, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Sat, Nov 16, 2019 at 01:58:55PM -0700, Alex Williamson wrote:
> On Thu, 10 Oct 2019 09:30:08 -0300
> Marcelo Tosatti <mtosatti@redhat.com> wrote:
> 
> > commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
> > reinjection when there is no period change by the guest.
> > 
> > In that case, old_period is 0, which ends up zeroing irq_coalesced
> > (counter of reinjected interrupts).
> > 
> > The consequence is Windows 7 is unable to synchronize time via NTP.
> > Easily reproducible by playing a fullscreen video with cirrus and VNC.
> > 
> > Fix by not updating s->irq_coalesced when old_period is 0.
> > 
> > V2: reorganize code (Paolo Bonzini)
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> This causes a regression for me, my win10 VM with assigned GPU
> experiences hangs and slowness with this.  Found via bisect, reverting
> restores normal behavior.  libvirt uses this commandline:
> 
> /usr/local/bin/qemu-system-x86_64 \
> -name guest=Steam-GeForce,debug-threads=on \
> -S \
> -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-Steam-GeForce/master-key.aes \
> -machine pc-i440fx-4.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off \
> -cpu host,hv-time,hv-relaxed,hv-vapic,hv-spinlocks=0x1fff,hv-vendor-id=KeenlyKVM,kvm=off \
> -drive file=/usr/share/edk2/ovmf/OVMF_CODE.fd,if=pflash,format=raw,unit=0,readonly=on \
> -drive file=/var/lib/libvirt/qemu/nvram/Steam_VARS.fd,if=pflash,format=raw,unit=1 \
> -m 4096 \
> -mem-prealloc \
> -mem-path /dev/hugepages/libvirt/qemu/1-Steam-GeForce \
> -overcommit mem-lock=off \
> -smp 4,sockets=1,cores=2,threads=2 \
> -uuid 2b417d4b-f25b-4522-a5be-e105f032f99c \
> -display none \
> -no-user-config \
> -nodefaults \
> -chardev socket,id=charmonitor,fd=38,server,nowait \
> -mon chardev=charmonitor,id=monitor,mode=control \
> -rtc base=localtime,driftfix=slew \
> -global kvm-pit.lost_tick_policy=delay \
> -no-hpet \
> -no-shutdown \
> -boot menu=on,strict=on \
> -device nec-usb-xhci,id=usb,bus=pci.0,addr=0x8 \
> -device virtio-scsi-pci,id=scsi0,num_queues=4,bus=pci.0,addr=0x5 \
> -drive file=/mnt/ssd/Steam.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-0,cache=none \
> -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,device_id=drive-scsi0-0-0-0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2,write-cache=on \
> -netdev tap,fd=40,id=hostnet0,vhost=on,vhostfd=41 \
> -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:60:ef:ac,bus=pci.0,addr=0x3 \
> -device vfio-pci,host=0000:01:00.0,id=hostdev0,bus=pci.0,addr=0x4,rombar=1 \
> -device vfio-pci,host=0000:01:00.1,id=hostdev1,bus=pci.0,addr=0x6,rombar=0 \
> -S \
> -debugcon file:/tmp/Steam-ovmf-debug.log \
> -global isa-debugcon.iobase=0x402 \
> -set device.hostdev0.x-pci-vendor-id=0x10de \
> -trace events=/var/lib/libvirt/images/Steam-GeForce.events \
> -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
> -msg timestamp=on

Alex,

-rtc base=localtime,driftfix=none should fix it. Can you confirm?




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-17  3:20   ` Marcelo Tosatti
@ 2019-11-17  4:31     ` Alex Williamson
  2019-11-17 10:12       ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Williamson @ 2019-11-17  4:31 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Paolo Bonzini, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Sun, 17 Nov 2019 01:20:19 -0200
Marcelo Tosatti <mtosatti@redhat.com> wrote:

> On Sat, Nov 16, 2019 at 01:58:55PM -0700, Alex Williamson wrote:
> > On Thu, 10 Oct 2019 09:30:08 -0300
> > Marcelo Tosatti <mtosatti@redhat.com> wrote:
> >   
> > > commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
> > > reinjection when there is no period change by the guest.
> > > 
> > > In that case, old_period is 0, which ends up zeroing irq_coalesced
> > > (counter of reinjected interrupts).
> > > 
> > > The consequence is Windows 7 is unable to synchronize time via NTP.
> > > Easily reproducible by playing a fullscreen video with cirrus and VNC.
> > > 
> > > Fix by not updating s->irq_coalesced when old_period is 0.
> > > 
> > > V2: reorganize code (Paolo Bonzini)
> > > 
> > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>  
> > 
> > This causes a regression for me, my win10 VM with assigned GPU
> > experiences hangs and slowness with this.  Found via bisect, reverting
> > restores normal behavior.  libvirt uses this commandline:
> > 
> > /usr/local/bin/qemu-system-x86_64 \
> > -name guest=Steam-GeForce,debug-threads=on \
> > -S \
> > -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-1-Steam-GeForce/master-key.aes \
> > -machine pc-i440fx-4.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off \
> > -cpu host,hv-time,hv-relaxed,hv-vapic,hv-spinlocks=0x1fff,hv-vendor-id=KeenlyKVM,kvm=off \
> > -drive file=/usr/share/edk2/ovmf/OVMF_CODE.fd,if=pflash,format=raw,unit=0,readonly=on \
> > -drive file=/var/lib/libvirt/qemu/nvram/Steam_VARS.fd,if=pflash,format=raw,unit=1 \
> > -m 4096 \
> > -mem-prealloc \
> > -mem-path /dev/hugepages/libvirt/qemu/1-Steam-GeForce \
> > -overcommit mem-lock=off \
> > -smp 4,sockets=1,cores=2,threads=2 \
> > -uuid 2b417d4b-f25b-4522-a5be-e105f032f99c \
> > -display none \
> > -no-user-config \
> > -nodefaults \
> > -chardev socket,id=charmonitor,fd=38,server,nowait \
> > -mon chardev=charmonitor,id=monitor,mode=control \
> > -rtc base=localtime,driftfix=slew \
> > -global kvm-pit.lost_tick_policy=delay \
> > -no-hpet \
> > -no-shutdown \
> > -boot menu=on,strict=on \
> > -device nec-usb-xhci,id=usb,bus=pci.0,addr=0x8 \
> > -device virtio-scsi-pci,id=scsi0,num_queues=4,bus=pci.0,addr=0x5 \
> > -drive file=/mnt/ssd/Steam.qcow2,format=qcow2,if=none,id=drive-scsi0-0-0-0,cache=none \
> > -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,device_id=drive-scsi0-0-0-0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2,write-cache=on \
> > -netdev tap,fd=40,id=hostnet0,vhost=on,vhostfd=41 \
> > -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:60:ef:ac,bus=pci.0,addr=0x3 \
> > -device vfio-pci,host=0000:01:00.0,id=hostdev0,bus=pci.0,addr=0x4,rombar=1 \
> > -device vfio-pci,host=0000:01:00.1,id=hostdev1,bus=pci.0,addr=0x6,rombar=0 \
> > -S \
> > -debugcon file:/tmp/Steam-ovmf-debug.log \
> > -global isa-debugcon.iobase=0x402 \
> > -set device.hostdev0.x-pci-vendor-id=0x10de \
> > -trace events=/var/lib/libvirt/images/Steam-GeForce.events \
> > -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
> > -msg timestamp=on  
> 
> Alex,
> 
> -rtc base=localtime,driftfix=none should fix it. Can you confirm?

How do I translate that to xml?  I'm currently using:

  <clock offset='localtime'>
    <timer name='rtc' tickpolicy='catchup'/>
    <timer name='pit' tickpolicy='delay'/>
    <timer name='hpet' present='no'/>
    <timer name='hypervclock' present='yes'/>
  </clock>

According to this[1] bz, 'catchup' translates to 'slew' and seems to be
the default policy in use.  The 'discard' option seemed the most
likely, but my VM fails to start with that:

error: unsupported configuration: unsupported rtc timer tickpolicy 'discard'

The 'merge' option gives me a similar error.  The 'delay' option is the
only other choice where I can actually start the VM, but this results in
the commandline:

-rtc base=localtime

(no driftfix specified)  This does appear to resolve the issue, but of
course compatibility with existing configurations has regressed.
Thanks,

Alex

[1] https://bugzilla.redhat.com/show_bug.cgi?id=865315



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-17  4:31     ` Alex Williamson
@ 2019-11-17 10:12       ` Paolo Bonzini
  2019-11-17 18:32         ` Alex Williamson
  2019-11-18 21:44         ` Marcelo Tosatti
  0 siblings, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2019-11-17 10:12 UTC (permalink / raw)
  To: Alex Williamson, Marcelo Tosatti
  Cc: Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On 17/11/19 05:31, Alex Williamson wrote:
> The 'merge' option gives me a similar error.  The 'delay' option is
> the only other choice where I can actually start the VM, but this
> results in the commandline:
> 
> -rtc base=localtime
> 
> (no driftfix specified)

none is the default, so that's okay.

> This does appear to resolve the issue, but of course compatibility
> with existing configurations has regressed. Thanks,

Yeah, I guess this was just a suggestion to double-check the cause of 
the regression.

The problem could be that periodic_timer_update is using old_period == 0 
for two cases: no period change, and old period was 0 (periodic timer 
off).

Something like the following distinguishes the two cases by always using
s->period (currently it was only used for driftfix=slew) and passing
s->period instead of 0 when there is no period change.

More cleanups are possible, but this is the smallest patch that implements
the idea.  The first patch is big but, indentation changes aside, it's
moving a single closed brace.

Alex/Marcelo, can you check if it fixes both of your test cases?

------------- 8< ---------------
From 48dc9d140c636067b8de1ab8e25b819151c83162 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 Nov 2019 10:07:38 +0100
Subject: [PATCH 1/2] Revert "mc146818rtc: fix timer interrupt reinjection"

This reverts commit b429de730174b388ea5760e3debb0d542ea3c261, except
that the reversal of the outer "if (period)" is left in.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 67 ++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index ee6bf82b40..9869dc5031 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -174,7 +174,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     int64_t cur_clock, next_irq_clock, lost_clock = 0;
 
     period = rtc_periodic_clock_ticks(s);
-
     if (!period) {
         s->irq_coalesced = 0;
         timer_del(s->periodic_timer);
@@ -197,42 +196,42 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
         last_periodic_clock = next_periodic_clock - old_period;
         lost_clock = cur_clock - last_periodic_clock;
         assert(lost_clock >= 0);
+    }
 
+    /*
+     * s->irq_coalesced can change for two reasons:
+     *
+     * a) if one or more periodic timer interrupts have been lost,
+     *    lost_clock will be more that a period.
+     *
+     * b) when the period may be reconfigured, we expect the OS to
+     *    treat delayed tick as the new period.  So, when switching
+     *    from a shorter to a longer period, scale down the missing,
+     *    because the OS will treat past delayed ticks as longer
+     *    (leftovers are put back into lost_clock).  When switching
+     *    to a shorter period, scale up the missing ticks since the
+     *    OS handler will treat past delayed ticks as shorter.
+     */
+    if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        uint32_t old_irq_coalesced = s->irq_coalesced;
+
+        s->period = period;
+        lost_clock += old_irq_coalesced * old_period;
+        s->irq_coalesced = lost_clock / s->period;
+        lost_clock %= s->period;
+        if (old_irq_coalesced != s->irq_coalesced ||
+            old_period != s->period) {
+            DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
+                      "period scaled from %d to %d\n", old_irq_coalesced,
+                      s->irq_coalesced, old_period, s->period);
+            rtc_coalesced_timer_update(s);
+        }
+    } else {
         /*
-         * s->irq_coalesced can change for two reasons:
-         *
-         * a) if one or more periodic timer interrupts have been lost,
-         *    lost_clock will be more that a period.
-         *
-         * b) when the period may be reconfigured, we expect the OS to
-         *    treat delayed tick as the new period.  So, when switching
-         *    from a shorter to a longer period, scale down the missing,
-         *    because the OS will treat past delayed ticks as longer
-         *    (leftovers are put back into lost_clock).  When switching
-         *    to a shorter period, scale up the missing ticks since the
-         *    OS handler will treat past delayed ticks as shorter.
+         * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
+         * is not used, we should make the time progress anyway.
          */
-        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
-            uint32_t old_irq_coalesced = s->irq_coalesced;
-
-            s->period = period;
-            lost_clock += old_irq_coalesced * old_period;
-            s->irq_coalesced = lost_clock / s->period;
-            lost_clock %= s->period;
-            if (old_irq_coalesced != s->irq_coalesced ||
-                old_period != s->period) {
-                DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
-                          "period scaled from %d to %d\n", old_irq_coalesced,
-                          s->irq_coalesced, old_period, s->period);
-                rtc_coalesced_timer_update(s);
-            }
-        } else {
-            /*
-             * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
-             * is not used, we should make the time progress anyway.
-             */
-            lost_clock = MIN(lost_clock, period);
-        }
+        lost_clock = MIN(lost_clock, period);
     }
 
     assert(lost_clock >= 0 && lost_clock <= period);
-- 
2.21.0


From 8546b5b65d9bc7b3f9c5fed4a650b27880ac72b3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 Nov 2019 10:28:14 +0100
Subject: [PATCH 2/2] mc146818rtc: fix timer interrupt reinjection again

Commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
reinjection when there is no period change by the guest.  In that
case, old_period is 0, which ends up zeroing irq_coalesced (counter of
reinjected interrupts).

The consequence is Windows 7 is unable to synchronize time via NTP.
Easily reproducible by playing a fullscreen video with cirrus and VNC.

Fix by passing s->period when periodic_timer_update is called due to
expiration of the timer.  With this change, old_period == 0 only
means that the periodic timer was off.

Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 9869dc5031..944677bea9 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -174,6 +174,8 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     int64_t cur_clock, next_irq_clock, lost_clock = 0;
 
     period = rtc_periodic_clock_ticks(s);
+    s->period = period;
+
     if (!period) {
         s->irq_coalesced = 0;
         timer_del(s->periodic_timer);
@@ -215,7 +217,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
         uint32_t old_irq_coalesced = s->irq_coalesced;
 
-        s->period = period;
         lost_clock += old_irq_coalesced * old_period;
         s->irq_coalesced = lost_clock / s->period;
         lost_clock %= s->period;
@@ -245,7 +246,7 @@ static void rtc_periodic_timer(void *opaque)
 {
     RTCState *s = opaque;
 
-    periodic_timer_update(s, s->next_periodic_time, 0);
+    periodic_timer_update(s, s->next_periodic_time, s->period);
     s->cmos_data[RTC_REG_C] |= REG_C_PF;
     if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
         s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
@@ -794,6 +795,7 @@ static int rtc_post_load(void *opaque, int version_id)
         s->offset = 0;
         check_update_timer(s);
     }
+    s->period = rtc_periodic_clock_ticks(s);
 
     /* The periodic timer is deterministic in record/replay mode,
      * so there is no need to update it after loading the vmstate.
@@ -803,7 +805,7 @@ static int rtc_post_load(void *opaque, int version_id)
         uint64_t now = qemu_clock_get_ns(rtc_clock);
         if (now < s->next_periodic_time ||
             now > (s->next_periodic_time + get_max_clock_jump())) {
-            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0);
+            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period);
         }
     }
 
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-17 10:12       ` Paolo Bonzini
@ 2019-11-17 18:32         ` Alex Williamson
  2019-11-18 21:44         ` Marcelo Tosatti
  1 sibling, 0 replies; 12+ messages in thread
From: Alex Williamson @ 2019-11-17 18:32 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Sun, 17 Nov 2019 11:12:43 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> On 17/11/19 05:31, Alex Williamson wrote:
> > The 'merge' option gives me a similar error.  The 'delay' option is
> > the only other choice where I can actually start the VM, but this
> > results in the commandline:
> > 
> > -rtc base=localtime
> > 
> > (no driftfix specified)  
> 
> none is the default, so that's okay.
> 
> > This does appear to resolve the issue, but of course compatibility
> > with existing configurations has regressed. Thanks,  
> 
> Yeah, I guess this was just a suggestion to double-check the cause of 
> the regression.
> 
> The problem could be that periodic_timer_update is using old_period == 0 
> for two cases: no period change, and old period was 0 (periodic timer 
> off).
> 
> Something like the following distinguishes the two cases by always using
> s->period (currently it was only used for driftfix=slew) and passing
> s->period instead of 0 when there is no period change.
> 
> More cleanups are possible, but this is the smallest patch that implements
> the idea.  The first patch is big but, indentation changes aside, it's
> moving a single closed brace.
> 
> Alex/Marcelo, can you check if it fixes both of your test cases?

It resolves the majority of the regression, but I think there's still a
performance issue.  Passmark PerformanceTest in the guest shows a 5+%
decrease versus a straight revert.  Thanks,

Alex

> ------------- 8< ---------------
> From 48dc9d140c636067b8de1ab8e25b819151c83162 Mon Sep 17 00:00:00 2001
> From: Paolo Bonzini <pbonzini@redhat.com>
> Date: Sun, 17 Nov 2019 10:07:38 +0100
> Subject: [PATCH 1/2] Revert "mc146818rtc: fix timer interrupt reinjection"
> 
> This reverts commit b429de730174b388ea5760e3debb0d542ea3c261, except
> that the reversal of the outer "if (period)" is left in.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/rtc/mc146818rtc.c | 67 ++++++++++++++++++++++----------------------
>  1 file changed, 33 insertions(+), 34 deletions(-)
> 
> diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
> index ee6bf82b40..9869dc5031 100644
> --- a/hw/rtc/mc146818rtc.c
> +++ b/hw/rtc/mc146818rtc.c
> @@ -174,7 +174,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>      int64_t cur_clock, next_irq_clock, lost_clock = 0;
>  
>      period = rtc_periodic_clock_ticks(s);
> -
>      if (!period) {
>          s->irq_coalesced = 0;
>          timer_del(s->periodic_timer);
> @@ -197,42 +196,42 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
>          last_periodic_clock = next_periodic_clock - old_period;
>          lost_clock = cur_clock - last_periodic_clock;
>          assert(lost_clock >= 0);
> +    }
>  
> +    /*
> +     * s->irq_coalesced can change for two reasons:
> +     *
> +     * a) if one or more periodic timer interrupts have been lost,
> +     *    lost_clock will be more that a period.
> +     *
> +     * b) when the period may be reconfigured, we expect the OS to
> +     *    treat delayed tick as the new period.  So, when switching
> +     *    from a shorter to a longer period, scale down the missing,
> +     *    because the OS will treat past delayed ticks as longer
> +     *    (leftovers are put back into lost_clock).  When switching
> +     *    to a shorter period, scale up the missing ticks since the
> +     *    OS handler will treat past delayed ticks as shorter.
> +     */
> +    if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
> +        uint32_t old_irq_coalesced = s->irq_coalesced;
> +
> +        s->period = period;
> +        lost_clock += old_irq_coalesced * old_period;
> +        s->irq_coalesced = lost_clock / s->period;
> +        lost_clock %= s->period;
> +        if (old_irq_coalesced != s->irq_coalesced ||
> +            old_period != s->period) {
> +            DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
> +                      "period scaled from %d to %d\n", old_irq_coalesced,
> +                      s->irq_coalesced, old_period, s->period);
> +            rtc_coalesced_timer_update(s);
> +        }
> +    } else {
>          /*
> -         * s->irq_coalesced can change for two reasons:
> -         *
> -         * a) if one or more periodic timer interrupts have been lost,
> -         *    lost_clock will be more that a period.
> -         *
> -         * b) when the period may be reconfigured, we expect the OS to
> -         *    treat delayed tick as the new period.  So, when switching
> -         *    from a shorter to a longer period, scale down the missing,
> -         *    because the OS will treat past delayed ticks as longer
> -         *    (leftovers are put back into lost_clock).  When switching
> -         *    to a shorter period, scale up the missing ticks since the
> -         *    OS handler will treat past delayed ticks as shorter.
> +         * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
> +         * is not used, we should make the time progress anyway.
>           */
> -        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
> -            uint32_t old_irq_coalesced = s->irq_coalesced;
> -
> -            s->period = period;
> -            lost_clock += old_irq_coalesced * old_period;
> -            s->irq_coalesced = lost_clock / s->period;
> -            lost_clock %= s->period;
> -            if (old_irq_coalesced != s->irq_coalesced ||
> -                old_period != s->period) {
> -                DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
> -                          "period scaled from %d to %d\n", old_irq_coalesced,
> -                          s->irq_coalesced, old_period, s->period);
> -                rtc_coalesced_timer_update(s);
> -            }
> -        } else {
> -            /*
> -             * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
> -             * is not used, we should make the time progress anyway.
> -             */
> -            lost_clock = MIN(lost_clock, period);
> -        }
> +        lost_clock = MIN(lost_clock, period);
>      }
>  
>      assert(lost_clock >= 0 && lost_clock <= period);



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-17 10:12       ` Paolo Bonzini
  2019-11-17 18:32         ` Alex Williamson
@ 2019-11-18 21:44         ` Marcelo Tosatti
  2019-11-18 22:08           ` Paolo Bonzini
  2019-11-18 23:28           ` Alex Williamson
  1 sibling, 2 replies; 12+ messages in thread
From: Marcelo Tosatti @ 2019-11-18 21:44 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Alex Williamson, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Sun, Nov 17, 2019 at 11:12:43AM +0100, Paolo Bonzini wrote:
> On 17/11/19 05:31, Alex Williamson wrote:
> > The 'merge' option gives me a similar error.  The 'delay' option is
> > the only other choice where I can actually start the VM, but this
> > results in the commandline:
> > 
> > -rtc base=localtime
> > 
> > (no driftfix specified)
> 
> none is the default, so that's okay.
> 
> > This does appear to resolve the issue, but of course compatibility
> > with existing configurations has regressed. Thanks,
> 
> Yeah, I guess this was just a suggestion to double-check the cause of 
> the regression.
> 
> The problem could be that periodic_timer_update is using old_period == 0 
> for two cases: no period change, and old period was 0 (periodic timer 
> off).


> Something like the following distinguishes the two cases by always using
> s->period (currently it was only used for driftfix=slew) and passing
> s->period instead of 0 when there is no period change.
> 
> More cleanups are possible, but this is the smallest patch that implements
> the idea.  The first patch is big but, indentation changes aside, it's
> moving a single closed brace.
> 
> Alex/Marcelo, can you check if it fixes both of your test cases?

Second patch blocks NTPd from synchronizing to a source at all
(can't even confirm if it fails to synchronize after a while).

Problem seems to be that calling from timer interrupt path:

   /*
     * if the periodic timer's update is due to period re-configuration,
     * we should count the clock since last interrupt.
     */
    if (old_period) {
        int64_t last_periodic_clock, next_periodic_clock;

        next_periodic_clock = muldiv64(s->next_periodic_time,
                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
        last_periodic_clock = next_periodic_clock - old_period;
        lost_clock = cur_clock - last_periodic_clock;
        assert(lost_clock >= 0);
    }

Adds the difference between when the timer interrupt actually executed 
(cur_clock) and when it should have executed (last_periodic_clock) 
as reinject time (which will end up injecting more timer interrupts 
than necessary, so the clock runs faster than it should).

Perhaps this is the reason for the 5%+ performance delta?

The following, on top of Paolo's two patches, fixes it for me
(and NTPd is able to maintain clock synchronized while playing video on Windows 7).

Alex perhaps you can give it a try?

--- hw/rtc/mc146818rtc.c.orig	2019-11-18 19:16:49.077479836 -0200
+++ hw/rtc/mc146818rtc.c	2019-11-18 19:22:35.706803090 -0200
@@ -168,7 +168,7 @@
  * is just due to period adjustment.
  */
 static void
-periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
+periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period, bool period_change)
 {
     uint32_t period;
     int64_t cur_clock, next_irq_clock, lost_clock = 0;
@@ -190,7 +190,7 @@
      * if the periodic timer's update is due to period re-configuration,
      * we should count the clock since last interrupt.
      */
-    if (old_period) {
+    if (old_period && period_change) {
         int64_t last_periodic_clock, next_periodic_clock;
 
         next_periodic_clock = muldiv64(s->next_periodic_time,
@@ -246,7 +246,7 @@
 {
     RTCState *s = opaque;
 
-    periodic_timer_update(s, s->next_periodic_time, s->period);
+    periodic_timer_update(s, s->next_periodic_time, s->period, false);
     s->cmos_data[RTC_REG_C] |= REG_C_PF;
     if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
         s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
@@ -512,7 +512,7 @@
 
             if (update_periodic_timer) {
                 periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
-                                      old_period);
+                                      old_period, true);
             }
 
             check_update_timer(s);
@@ -551,7 +551,7 @@
 
             if (update_periodic_timer) {
                 periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
-                                      old_period);
+                                      old_period, true);
             }
 
             check_update_timer(s);
@@ -805,7 +805,7 @@
         uint64_t now = qemu_clock_get_ns(rtc_clock);
         if (now < s->next_periodic_time ||
             now > (s->next_periodic_time + get_max_clock_jump())) {
-            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period);
+            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period, false);
         }
     }
 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-18 21:44         ` Marcelo Tosatti
@ 2019-11-18 22:08           ` Paolo Bonzini
  2019-11-18 23:28           ` Alex Williamson
  1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2019-11-18 22:08 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Alex Williamson, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On 18/11/19 22:44, Marcelo Tosatti wrote:
> Second patch blocks NTPd from synchronizing to a source at all
> (can't even confirm if it fails to synchronize after a while).
> 
> Problem seems to be that calling from timer interrupt path:
> 
>    /*
>      * if the periodic timer's update is due to period re-configuration,
>      * we should count the clock since last interrupt.
>      */
>     if (old_period) {
>         int64_t last_periodic_clock, next_periodic_clock;
> 
>         next_periodic_clock = muldiv64(s->next_periodic_time,
>                                 RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
>         last_periodic_clock = next_periodic_clock - old_period;
>         lost_clock = cur_clock - last_periodic_clock;
>         assert(lost_clock >= 0);
>     }
> 
> Adds the difference between when the timer interrupt actually executed 
> (cur_clock) and when it should have executed (last_periodic_clock) 
> as reinject time (which will end up injecting more timer interrupts 
> than necessary, so the clock runs faster than it should).

Yes, I made a similar reasoning.  Thanks for the patch, I don't like 
that it adds complication over complication but I guess it would be okay 
for 4.2, if Alex confirms it works for him.  Mine is a much bigger
change .

> Perhaps this is the reason for the 5%+ performance delta?
> 
> The following, on top of Paolo's two patches, fixes it for me
> (and NTPd is able to maintain clock synchronized while playing video on Windows 7).

FYI, here is my attempt at cleaning up everything:

------------------- 8< ----------------
From c0a53b1c9a331ac4cf5846b477ba0fb795933a34 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 Nov 2019 10:07:38 +0100
Subject: [PATCH 1/5] Revert "mc146818rtc: fix timer interrupt reinjection"

This reverts commit b429de730174b388ea5760e3debb0d542ea3c261, except
that the reversal of the outer "if (period)" is left in.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 67 ++++++++++++++++++++++++++--------------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index ee6bf82..9869dc5 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -174,7 +174,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     int64_t cur_clock, next_irq_clock, lost_clock = 0;
 
     period = rtc_periodic_clock_ticks(s);
-
     if (!period) {
         s->irq_coalesced = 0;
         timer_del(s->periodic_timer);
@@ -197,42 +196,42 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
         last_periodic_clock = next_periodic_clock - old_period;
         lost_clock = cur_clock - last_periodic_clock;
         assert(lost_clock >= 0);
+    }
 
+    /*
+     * s->irq_coalesced can change for two reasons:
+     *
+     * a) if one or more periodic timer interrupts have been lost,
+     *    lost_clock will be more that a period.
+     *
+     * b) when the period may be reconfigured, we expect the OS to
+     *    treat delayed tick as the new period.  So, when switching
+     *    from a shorter to a longer period, scale down the missing,
+     *    because the OS will treat past delayed ticks as longer
+     *    (leftovers are put back into lost_clock).  When switching
+     *    to a shorter period, scale up the missing ticks since the
+     *    OS handler will treat past delayed ticks as shorter.
+     */
+    if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        uint32_t old_irq_coalesced = s->irq_coalesced;
+
+        s->period = period;
+        lost_clock += old_irq_coalesced * old_period;
+        s->irq_coalesced = lost_clock / s->period;
+        lost_clock %= s->period;
+        if (old_irq_coalesced != s->irq_coalesced ||
+            old_period != s->period) {
+            DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
+                      "period scaled from %d to %d\n", old_irq_coalesced,
+                      s->irq_coalesced, old_period, s->period);
+            rtc_coalesced_timer_update(s);
+        }
+    } else {
         /*
-         * s->irq_coalesced can change for two reasons:
-         *
-         * a) if one or more periodic timer interrupts have been lost,
-         *    lost_clock will be more that a period.
-         *
-         * b) when the period may be reconfigured, we expect the OS to
-         *    treat delayed tick as the new period.  So, when switching
-         *    from a shorter to a longer period, scale down the missing,
-         *    because the OS will treat past delayed ticks as longer
-         *    (leftovers are put back into lost_clock).  When switching
-         *    to a shorter period, scale up the missing ticks since the
-         *    OS handler will treat past delayed ticks as shorter.
+         * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
+         * is not used, we should make the time progress anyway.
          */
-        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
-            uint32_t old_irq_coalesced = s->irq_coalesced;
-
-            s->period = period;
-            lost_clock += old_irq_coalesced * old_period;
-            s->irq_coalesced = lost_clock / s->period;
-            lost_clock %= s->period;
-            if (old_irq_coalesced != s->irq_coalesced ||
-                old_period != s->period) {
-                DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
-                          "period scaled from %d to %d\n", old_irq_coalesced,
-                          s->irq_coalesced, old_period, s->period);
-                rtc_coalesced_timer_update(s);
-            }
-        } else {
-            /*
-             * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
-             * is not used, we should make the time progress anyway.
-             */
-            lost_clock = MIN(lost_clock, period);
-        }
+        lost_clock = MIN(lost_clock, period);
     }
 
     assert(lost_clock >= 0 && lost_clock <= period);
-- 
1.8.3.1


From 6a989dedd43b1885bd3f2178d686bf7a4fe06a08 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 Nov 2019 10:28:14 +0100
Subject: [PATCH 2/5] mc146818rtc: keep s->period up to date

Right now s->period is only used if s->lost_tick_policy ==
LOST_TICK_POLICY_SLEW.  Not having to recompute it all the time
will come in handy in upcoming refactoring, so just store it in
RTCState.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 9869dc5..da7837c 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -174,6 +174,8 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     int64_t cur_clock, next_irq_clock, lost_clock = 0;
 
     period = rtc_periodic_clock_ticks(s);
+    s->period = period;
+
     if (!period) {
         s->irq_coalesced = 0;
         timer_del(s->periodic_timer);
@@ -215,7 +217,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
         uint32_t old_irq_coalesced = s->irq_coalesced;
 
-        s->period = period;
         lost_clock += old_irq_coalesced * old_period;
         s->irq_coalesced = lost_clock / s->period;
         lost_clock %= s->period;
@@ -231,12 +232,12 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
          * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
          * is not used, we should make the time progress anyway.
          */
-        lost_clock = MIN(lost_clock, period);
+        lost_clock = MIN(lost_clock, s->period);
     }
 
-    assert(lost_clock >= 0 && lost_clock <= period);
+    assert(lost_clock >= 0 && lost_clock <= s->period);
 
-    next_irq_clock = cur_clock + period - lost_clock;
+    next_irq_clock = cur_clock + s->period - lost_clock;
     s->next_periodic_time = periodic_clock_to_ns(next_irq_clock) + 1;
     timer_mod(s->periodic_timer, s->next_periodic_time);
 }
@@ -794,6 +795,7 @@ static int rtc_post_load(void *opaque, int version_id)
         s->offset = 0;
         check_update_timer(s);
     }
+    s->period = rtc_periodic_clock_ticks(s);
 
     /* The periodic timer is deterministic in record/replay mode,
      * so there is no need to update it after loading the vmstate.
-- 
1.8.3.1


From 258e46c4f234385c60857ef3542c335bf6560608 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 11:50:31 +0100
Subject: [PATCH 3/5] mc146818rtc: clean up update of coalesced timer for
 periodic_timer_update

Remove knowledge of old_period from the code that sets up the
next periodic timer deadline.  Instead, account the missed IRQs into
lost_clock before that part runs.

This prepares for splitting periodic_timer_update in two parts,
one for period reconfiguration and one that sets up the next
periodic timer deadline.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index da7837c..47d966c 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -198,6 +198,12 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
         last_periodic_clock = next_periodic_clock - old_period;
         lost_clock = cur_clock - last_periodic_clock;
         assert(lost_clock >= 0);
+
+        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+            lost_clock += s->irq_coalesced * old_period;
+            s->irq_coalesced = 0;
+            timer_del(s->coalesced_timer);
+        }
     }
 
     /*
@@ -215,18 +221,9 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
      *    OS handler will treat past delayed ticks as shorter.
      */
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
-        uint32_t old_irq_coalesced = s->irq_coalesced;
-
-        lost_clock += old_irq_coalesced * old_period;
         s->irq_coalesced = lost_clock / s->period;
         lost_clock %= s->period;
-        if (old_irq_coalesced != s->irq_coalesced ||
-            old_period != s->period) {
-            DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, "
-                      "period scaled from %d to %d\n", old_irq_coalesced,
-                      s->irq_coalesced, old_period, s->period);
-            rtc_coalesced_timer_update(s);
-        }
+        rtc_coalesced_timer_update(s);
     } else {
         /*
          * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW
@@ -883,6 +880,7 @@ static void rtc_reset(void *opaque)
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
         s->irq_coalesced = 0;
         s->irq_reinject_on_ack_count = 0;
+        rtc_coalesced_timer_update(s);
     }
 }
 
-- 
1.8.3.1


From c722009caa5d4959499a89d63d74082164385f45 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 12:04:43 +0100
Subject: [PATCH 4/5] mc146818rtc: reorganize arguments of
 periodic_timer_update

This is mostly preparation for the next patch.  Because the next patch
will pass the number of lost 32 kHz ticks to periodic_timer_update,
it makes sense that the current time is also passed in 32 kHz units.
This makes calling periodic_timer_update from cmos_ioport_write a
bit unwieldy, so extract periodic_timer_reconfigure.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 47d966c..8a9e004 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -163,15 +163,23 @@ static uint32_t rtc_periodic_clock_ticks(RTCState *s)
     return periodic_period_to_clock(period_code);
 }
 
+static uint32_t rtc_periodic_clock_get_ticks(void)
+{
+    int64_t current_time = qemu_clock_get_ns(rtc_clock);
+
+    /* compute 32 khz clock */
+    return muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+}
+
 /*
  * handle periodic timer. @old_period indicates the periodic timer update
  * is just due to period adjustment.
  */
 static void
-periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
+periodic_timer_update(RTCState *s, int64_t cur_clock, uint32_t old_period)
 {
     uint32_t period;
-    int64_t cur_clock, next_irq_clock, lost_clock = 0;
+    int64_t next_irq_clock, lost_clock = 0;
 
     period = rtc_periodic_clock_ticks(s);
     s->period = period;
@@ -182,10 +190,6 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
         return;
     }
 
-    /* compute 32 khz clock */
-    cur_clock =
-        muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
-
     /*
      * if the periodic timer's update is due to period re-configuration,
      * we should count the clock since last interrupt.
@@ -239,11 +243,23 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
     timer_mod(s->periodic_timer, s->next_periodic_time);
 }
 
+static void
+periodic_timer_reconfigure(RTCState *s, uint32_t old_period)
+{
+    int64_t cur_clock = rtc_periodic_clock_get_ticks();
+
+    periodic_timer_update(s, cur_clock, old_period);
+}
+
 static void rtc_periodic_timer(void *opaque)
 {
     RTCState *s = opaque;
+    int64_t last_periodic_clock;
+
+    last_periodic_clock =
+        muldiv64(s->next_periodic_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
 
-    periodic_timer_update(s, s->next_periodic_time, 0);
+    periodic_timer_update(s, last_periodic_clock, 0);
     s->cmos_data[RTC_REG_C] |= REG_C_PF;
     if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
         s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
@@ -508,8 +524,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
                 (s->cmos_data[RTC_REG_A] & REG_A_UIP);
 
             if (update_periodic_timer) {
-                periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
-                                      old_period);
+                periodic_timer_reconfigure(s, old_period);
             }
 
             check_update_timer(s);
@@ -547,8 +562,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
             s->cmos_data[RTC_REG_B] = data;
 
             if (update_periodic_timer) {
-                periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
-                                      old_period);
+                periodic_timer_reconfigure(s, old_period);
             }
 
             check_update_timer(s);
@@ -802,7 +816,7 @@ static int rtc_post_load(void *opaque, int version_id)
         uint64_t now = qemu_clock_get_ns(rtc_clock);
         if (now < s->next_periodic_time ||
             now > (s->next_periodic_time + get_max_clock_jump())) {
-            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0);
+            periodic_timer_reconfigure(s, s->period);
         }
     }
 
-- 
1.8.3.1


From 8994fed352d8147d2dea99710728cc15fb9f2cc2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Nov 2019 11:59:59 +0100
Subject: [PATCH 5/5] mc146818rtc: fix timer interrupt reinjection again

Commit 369b41359af46bded5799c9ef8be2b641d92e043 broke timer interrupt
reinjection when there is no period change by the guest.  In that
case, old_period is 0, which ends up zeroing irq_coalesced (counter of
reinjected interrupts).

The consequence is Windows 7 is unable to synchronize time via NTP.
Easily reproducible by playing a fullscreen video with cirrus and VNC.

That part of periodic_timer_update that introduces the bug is
only needed due to reconfiguration of the period, so move it to
periodic_timer_reconfigure.  In that path, old_period == 0 does mean
that the periodic timer was off.

Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/rtc/mc146818rtc.c | 72 +++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 8a9e004..823f706 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -171,44 +171,10 @@ static uint32_t rtc_periodic_clock_get_ticks(void)
     return muldiv64(current_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
 }
 
-/*
- * handle periodic timer. @old_period indicates the periodic timer update
- * is just due to period adjustment.
- */
 static void
-periodic_timer_update(RTCState *s, int64_t cur_clock, uint32_t old_period)
+periodic_timer_update(RTCState *s, int64_t cur_clock, int64_t lost_clock)
 {
-    uint32_t period;
-    int64_t next_irq_clock, lost_clock = 0;
-
-    period = rtc_periodic_clock_ticks(s);
-    s->period = period;
-
-    if (!period) {
-        s->irq_coalesced = 0;
-        timer_del(s->periodic_timer);
-        return;
-    }
-
-    /*
-     * if the periodic timer's update is due to period re-configuration,
-     * we should count the clock since last interrupt.
-     */
-    if (old_period) {
-        int64_t last_periodic_clock, next_periodic_clock;
-
-        next_periodic_clock = muldiv64(s->next_periodic_time,
-                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
-        last_periodic_clock = next_periodic_clock - old_period;
-        lost_clock = cur_clock - last_periodic_clock;
-        assert(lost_clock >= 0);
-
-        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
-            lost_clock += s->irq_coalesced * old_period;
-            s->irq_coalesced = 0;
-            timer_del(s->coalesced_timer);
-        }
-    }
+    int64_t next_irq_clock;
 
     /*
      * s->irq_coalesced can change for two reasons:
@@ -243,23 +209,53 @@ periodic_timer_update(RTCState *s, int64_t cur_clock, uint32_t old_period)
     timer_mod(s->periodic_timer, s->next_periodic_time);
 }
 
+/* adjust periodic timer on period adjustment */
 static void
 periodic_timer_reconfigure(RTCState *s, uint32_t old_period)
 {
     int64_t cur_clock = rtc_periodic_clock_get_ticks();
+    int64_t lost_clock = 0;
 
-    periodic_timer_update(s, cur_clock, old_period);
+    s->period = rtc_periodic_clock_ticks(s);
+    if (!s->period) {
+        s->irq_coalesced = 0;
+        timer_del(s->periodic_timer);
+        return;
+    }
+
+    /*
+     * if the periodic timer was active before, account the time since
+     * the last interrupt.
+     */
+    if (old_period) {
+        int64_t last_periodic_clock, next_periodic_clock;
+
+        next_periodic_clock = muldiv64(s->next_periodic_time,
+                                RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
+        last_periodic_clock = next_periodic_clock - old_period;
+        lost_clock = cur_clock - last_periodic_clock;
+        assert(lost_clock >= 0);
+
+        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+            lost_clock += s->irq_coalesced * old_period;
+            s->irq_coalesced = 0;
+            timer_del(s->coalesced_timer);
+        }
+    }
+
+    periodic_timer_update(s, cur_clock, lost_clock);
 }
 
 static void rtc_periodic_timer(void *opaque)
 {
     RTCState *s = opaque;
+    int64_t cur_clock = rtc_periodic_clock_get_ticks();
     int64_t last_periodic_clock;
 
     last_periodic_clock =
         muldiv64(s->next_periodic_time, RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
 
-    periodic_timer_update(s, last_periodic_clock, 0);
+    periodic_timer_update(s, cur_clock, cur_clock - last_periodic_clock);
     s->cmos_data[RTC_REG_C] |= REG_C_PF;
     if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
         s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] mc146818rtc: fix timer interrupt reinjection
  2019-11-18 21:44         ` Marcelo Tosatti
  2019-11-18 22:08           ` Paolo Bonzini
@ 2019-11-18 23:28           ` Alex Williamson
  1 sibling, 0 replies; 12+ messages in thread
From: Alex Williamson @ 2019-11-18 23:28 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Paolo Bonzini, Xiao Guangrong, qemu-devel, Vadim Rozenfeld

On Mon, 18 Nov 2019 19:44:30 -0200
Marcelo Tosatti <mtosatti@redhat.com> wrote:

> On Sun, Nov 17, 2019 at 11:12:43AM +0100, Paolo Bonzini wrote:
> > On 17/11/19 05:31, Alex Williamson wrote:  
> > > The 'merge' option gives me a similar error.  The 'delay' option is
> > > the only other choice where I can actually start the VM, but this
> > > results in the commandline:
> > > 
> > > -rtc base=localtime
> > > 
> > > (no driftfix specified)  
> > 
> > none is the default, so that's okay.
> >   
> > > This does appear to resolve the issue, but of course compatibility
> > > with existing configurations has regressed. Thanks,  
> > 
> > Yeah, I guess this was just a suggestion to double-check the cause of 
> > the regression.
> > 
> > The problem could be that periodic_timer_update is using old_period == 0 
> > for two cases: no period change, and old period was 0 (periodic timer 
> > off).  
> 
> 
> > Something like the following distinguishes the two cases by always using
> > s->period (currently it was only used for driftfix=slew) and passing
> > s->period instead of 0 when there is no period change.
> > 
> > More cleanups are possible, but this is the smallest patch that implements
> > the idea.  The first patch is big but, indentation changes aside, it's
> > moving a single closed brace.
> > 
> > Alex/Marcelo, can you check if it fixes both of your test cases?  
> 
> Second patch blocks NTPd from synchronizing to a source at all
> (can't even confirm if it fails to synchronize after a while).
> 
> Problem seems to be that calling from timer interrupt path:
> 
>    /*
>      * if the periodic timer's update is due to period re-configuration,
>      * we should count the clock since last interrupt.
>      */
>     if (old_period) {
>         int64_t last_periodic_clock, next_periodic_clock;
> 
>         next_periodic_clock = muldiv64(s->next_periodic_time,
>                                 RTC_CLOCK_RATE, NANOSECONDS_PER_SECOND);
>         last_periodic_clock = next_periodic_clock - old_period;
>         lost_clock = cur_clock - last_periodic_clock;
>         assert(lost_clock >= 0);
>     }
> 
> Adds the difference between when the timer interrupt actually executed 
> (cur_clock) and when it should have executed (last_periodic_clock) 
> as reinject time (which will end up injecting more timer interrupts 
> than necessary, so the clock runs faster than it should).
> 
> Perhaps this is the reason for the 5%+ performance delta?
> 
> The following, on top of Paolo's two patches, fixes it for me
> (and NTPd is able to maintain clock synchronized while playing video on Windows 7).
> 
> Alex perhaps you can give it a try?

Yes, Paolo's two patches plus this seems to resolve both the functional
and performance issues.  Thanks,

Alex
 
> --- hw/rtc/mc146818rtc.c.orig	2019-11-18 19:16:49.077479836 -0200
> +++ hw/rtc/mc146818rtc.c	2019-11-18 19:22:35.706803090 -0200
> @@ -168,7 +168,7 @@
>   * is just due to period adjustment.
>   */
>  static void
> -periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period)
> +periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period, bool period_change)
>  {
>      uint32_t period;
>      int64_t cur_clock, next_irq_clock, lost_clock = 0;
> @@ -190,7 +190,7 @@
>       * if the periodic timer's update is due to period re-configuration,
>       * we should count the clock since last interrupt.
>       */
> -    if (old_period) {
> +    if (old_period && period_change) {
>          int64_t last_periodic_clock, next_periodic_clock;
>  
>          next_periodic_clock = muldiv64(s->next_periodic_time,
> @@ -246,7 +246,7 @@
>  {
>      RTCState *s = opaque;
>  
> -    periodic_timer_update(s, s->next_periodic_time, s->period);
> +    periodic_timer_update(s, s->next_periodic_time, s->period, false);
>      s->cmos_data[RTC_REG_C] |= REG_C_PF;
>      if (s->cmos_data[RTC_REG_B] & REG_B_PIE) {
>          s->cmos_data[RTC_REG_C] |= REG_C_IRQF;
> @@ -512,7 +512,7 @@
>  
>              if (update_periodic_timer) {
>                  periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
> -                                      old_period);
> +                                      old_period, true);
>              }
>  
>              check_update_timer(s);
> @@ -551,7 +551,7 @@
>  
>              if (update_periodic_timer) {
>                  periodic_timer_update(s, qemu_clock_get_ns(rtc_clock),
> -                                      old_period);
> +                                      old_period, true);
>              }
>  
>              check_update_timer(s);
> @@ -805,7 +805,7 @@
>          uint64_t now = qemu_clock_get_ns(rtc_clock);
>          if (now < s->next_periodic_time ||
>              now > (s->next_periodic_time + get_max_clock_jump())) {
> -            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period);
> +            periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period, false);
>          }
>      }
>  



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-11-18 23:29 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-10 12:30 [PATCH v2] mc146818rtc: fix timer interrupt reinjection Marcelo Tosatti
2019-10-10 12:35 ` Paolo Bonzini
2019-10-24 11:22 ` Philippe Mathieu-Daudé
2019-10-24 12:08   ` Philippe Mathieu-Daudé
2019-11-16 20:58 ` Alex Williamson
2019-11-17  3:20   ` Marcelo Tosatti
2019-11-17  4:31     ` Alex Williamson
2019-11-17 10:12       ` Paolo Bonzini
2019-11-17 18:32         ` Alex Williamson
2019-11-18 21:44         ` Marcelo Tosatti
2019-11-18 22:08           ` Paolo Bonzini
2019-11-18 23:28           ` Alex Williamson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).