All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
@ 2018-11-28 21:31 ` Andrii Anisov
  2018-11-28 22:00   ` Julien Grall
  2018-11-28 21:31 ` [RFC 02/16] hack: drop GIC v3 support Andrii Anisov
                   ` (15 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:31 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Julien Grall <julien.grall@arm.com>

This makes function enter_hypervisor_head() being executed with
irqs locked. This also give a fine side effect - it assures that
LRs are cleared prior to any IRQs processing, which leads to a
better (faster) IRQs processing.

Signed-off-by: Julien Grall <julien.grall@arm.com>
[Andrii: add a justification commit message]
Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/arm64/entry.S | 11 +++++------
 xen/arch/arm/traps.c       |  6 ++++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
index 97b05f5..8f28789 100644
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -195,7 +195,6 @@ hyp_error_invalid:
 
 hyp_error:
         entry   hyp=1
-        msr     daifclr, #2
         mov     x0, sp
         bl      do_trap_hyp_serror
         exit    hyp=1
@@ -203,7 +202,7 @@ hyp_error:
 /* Traps taken in Current EL with SP_ELx */
 hyp_sync:
         entry   hyp=1
-        msr     daifclr, #6
+        msr     daifclr, #4
         mov     x0, sp
         bl      do_trap_hyp_sync
         exit    hyp=1
@@ -304,7 +303,7 @@ guest_sync_slowpath:
         ALTERNATIVE("bl check_pending_vserror; cbnz x0, 1f",
                     "nop; nop",
                     SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT)
-        msr     daifclr, #6
+        msr     daifclr, #4
         mov     x0, sp
         bl      do_trap_guest_sync
 1:
@@ -332,7 +331,7 @@ guest_fiq_invalid:
 
 guest_error:
         entry   hyp=0, compat=0
-        msr     daifclr, #6
+        msr     daifclr, #4
         mov     x0, sp
         bl      do_trap_guest_serror
         exit    hyp=0, compat=0
@@ -347,7 +346,7 @@ guest_sync_compat:
         ALTERNATIVE("bl check_pending_vserror; cbnz x0, 1f",
                     "nop; nop",
                     SKIP_SYNCHRONIZE_SERROR_ENTRY_EXIT)
-        msr     daifclr, #6
+        msr     daifclr, #4
         mov     x0, sp
         bl      do_trap_guest_sync
 1:
@@ -375,7 +374,7 @@ guest_fiq_invalid_compat:
 
 guest_error_compat:
         entry   hyp=0, compat=1
-        msr     daifclr, #6
+        msr     daifclr, #4
         mov     x0, sp
         bl      do_trap_guest_serror
         exit    hyp=0, compat=1
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 88ffeeb..18355e9 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -2038,6 +2038,8 @@ static void enter_hypervisor_head(struct cpu_user_regs *regs)
     {
         struct vcpu *v = current;
 
+        ASSERT(!local_irq_is_enabled());
+
         /* If the guest has disabled the workaround, bring it back on. */
         if ( needs_ssbd_flip(v) )
             arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
@@ -2072,6 +2074,7 @@ void do_trap_guest_sync(struct cpu_user_regs *regs)
     const union hsr hsr = { .bits = regs->hsr };
 
     enter_hypervisor_head(regs);
+    local_irq_enable();
 
     switch ( hsr.ec )
     {
@@ -2207,6 +2210,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs)
     const union hsr hsr = { .bits = regs->hsr };
 
     enter_hypervisor_head(regs);
+    local_irq_enable();
 
     switch ( hsr.ec )
     {
@@ -2245,6 +2249,7 @@ void do_trap_hyp_sync(struct cpu_user_regs *regs)
 void do_trap_hyp_serror(struct cpu_user_regs *regs)
 {
     enter_hypervisor_head(regs);
+    local_irq_enable();
 
     __do_trap_serror(regs, VABORT_GEN_BY_GUEST(regs));
 }
@@ -2252,6 +2257,7 @@ void do_trap_hyp_serror(struct cpu_user_regs *regs)
 void do_trap_guest_serror(struct cpu_user_regs *regs)
 {
     enter_hypervisor_head(regs);
+    local_irq_enable();
 
     __do_trap_serror(regs, true);
 }
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 02/16] hack: drop GIC v3 support
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
  2018-11-28 21:31 ` [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path Andrii Anisov
@ 2018-11-28 21:31 ` Andrii Anisov
  2018-11-29 12:08   ` Andre Przywara
  2018-11-28 21:31 ` [RFC 03/16] vgic: move pause_flags check out of vgic spinlock Andrii Anisov
                   ` (14 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:31 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

This reduces some code and conditions in an IRQ processing path,
and opens way to further code reduction.

Also insert compilation errors into gicv3 code, because it would
not compile or work with following patches.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/configs/arm64_defconfig |  1 +
 xen/arch/arm/gic-v3-lpi.c            |  2 ++
 xen/arch/arm/gic-v3.c                |  2 ++
 xen/arch/arm/gic-vgic.c              | 13 ++++++++++++-
 xen/arch/arm/gic.c                   |  6 ++++++
 xen/arch/arm/vgic-v3-its.c           |  1 +
 xen/arch/arm/vgic.c                  | 20 ++++++++++++++++++++
 xen/arch/arm/vgic/vgic.c             |  2 ++
 xen/include/asm-arm/irq.h            |  2 ++
 xen/include/asm-arm/vgic.h           | 17 +++++++++++------
 10 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/xen/arch/arm/configs/arm64_defconfig b/xen/arch/arm/configs/arm64_defconfig
index e69de29..452c7ac 100644
--- a/xen/arch/arm/configs/arm64_defconfig
+++ b/xen/arch/arm/configs/arm64_defconfig
@@ -0,0 +1 @@
+# CONFIG_GICV3 is not set
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index e8c6e15..be64e17 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -32,6 +32,8 @@
 #include <asm/page.h>
 #include <asm/sysregs.h>
 
+#error "The current gic/vgic/domain code does not support GICv3"
+
 /*
  * There could be a lot of LPIs on the host side, and they always go to
  * a guest. So having a struct irq_desc for each of them would be wasteful
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 6fbc106..8e835b5 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -44,6 +44,8 @@
 #include <asm/io.h>
 #include <asm/sysregs.h>
 
+#error "The current gic/vgic/domain code does not support GICv3"
+
 /* Global state */
 static struct {
     void __iomem *map_dbase;  /* Mapped address of distributor registers */
diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 990399c..869987a 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -36,7 +36,9 @@ static inline void gic_set_lr(int lr, struct pending_irq *p,
 {
     ASSERT(!local_irq_is_enabled());
 
+#ifdef CONFIG_GICV3
     clear_bit(GIC_IRQ_GUEST_PRISTINE_LPI, &p->status);
+#endif
 
     gic_hw_ops->update_lr(lr, p->irq, p->priority,
                           p->desc ? p->desc->irq : INVALID_IRQ, state);
@@ -77,9 +79,11 @@ void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq)
 {
     struct pending_irq *n = irq_to_pending(v, virtual_irq);
 
+#ifdef CONFIG_GICV3
     /* If an LPI has been removed meanwhile, there is nothing left to raise. */
     if ( unlikely(!n) )
         return;
+#endif
 
     ASSERT(spin_is_locked(&v->arch.vgic.lock));
 
@@ -112,13 +116,14 @@ static unsigned int gic_find_unused_lr(struct vcpu *v,
 {
     unsigned int nr_lrs = gic_get_nr_lrs();
     unsigned long *lr_mask = (unsigned long *) &this_cpu(lr_mask);
-    struct gic_lr lr_val;
 
     ASSERT(spin_is_locked(&v->arch.vgic.lock));
 
+#ifdef CONFIG_GICV3
     if ( unlikely(test_bit(GIC_IRQ_GUEST_PRISTINE_LPI, &p->status)) )
     {
         unsigned int used_lr;
+        struct gic_lr lr_val;
 
         for_each_set_bit(used_lr, lr_mask, nr_lrs)
         {
@@ -127,6 +132,7 @@ static unsigned int gic_find_unused_lr(struct vcpu *v,
                 return used_lr;
         }
     }
+#endif
 
     lr = find_next_zero_bit(lr_mask, nr_lrs, lr);
 
@@ -142,9 +148,11 @@ void gic_raise_guest_irq(struct vcpu *v, unsigned int virtual_irq,
 
     ASSERT(spin_is_locked(&v->arch.vgic.lock));
 
+#ifdef CONFIG_GICV3
     if ( unlikely(!p) )
         /* An unmapped LPI does not need to be raised. */
         return;
+#endif
 
     if ( v == current && list_empty(&v->arch.vgic.lr_pending) )
     {
@@ -172,6 +180,8 @@ static void gic_update_one_lr(struct vcpu *v, int i)
     gic_hw_ops->read_lr(i, &lr_val);
     irq = lr_val.virq;
     p = irq_to_pending(v, irq);
+
+#ifdef CONFIG_GICV3
     /*
      * An LPI might have been unmapped, in which case we just clean up here.
      * If that LPI is marked as PRISTINE, the information in the LR is bogus,
@@ -188,6 +198,7 @@ static void gic_update_one_lr(struct vcpu *v, int i)
 
         return;
     }
+#endif
 
     if ( lr_val.active )
     {
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 6cc7dec..77fc06f 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -136,7 +136,9 @@ int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
     /* Caller has already checked that the IRQ is an SPI */
     ASSERT(virq >= 32);
     ASSERT(virq < vgic_num_irqs(d));
+#ifdef CONFIG_GICV3
     ASSERT(!is_lpi(virq));
+#endif
 
     /*
      * When routing an IRQ to guest, the virtual state is not synced
@@ -168,7 +170,9 @@ int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
 
     ASSERT(spin_is_locked(&desc->lock));
     ASSERT(test_bit(_IRQ_GUEST, &desc->status));
+#ifdef CONFIG_GICV3
     ASSERT(!is_lpi(virq));
+#endif
 
     /*
      * Removing an interrupt while the domain is running may have
@@ -391,6 +395,7 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
             do_IRQ(regs, irq, is_fiq);
             local_irq_disable();
         }
+#ifdef CONFIG_GICV3
         else if ( is_lpi(irq) )
         {
             local_irq_enable();
@@ -398,6 +403,7 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
             gic_hw_ops->do_LPI(irq);
             local_irq_disable();
         }
+#endif
         else if ( unlikely(irq < 16) )
         {
             do_sgi(regs, irq);
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 5b73c4e..3c6693d 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -47,6 +47,7 @@
 #include <asm/vgic-emul.h>
 #include <asm/vreg.h>
 
+#error "The current gic/vgic/domain code does not support GICv3"
 /*
  * Data structure to describe a virtual ITS.
  * If both the vcmd_lock and the its_lock are required, the vcmd_lock must
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index f2608b0..a2419d0 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -64,14 +64,18 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
 
 void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
 {
+#ifdef CONFIG_GICV3
     /* The lpi_vcpu_id field must be big enough to hold a VCPU ID. */
     BUILD_BUG_ON(BIT(sizeof(p->lpi_vcpu_id) * 8) < MAX_VIRT_CPUS);
+#endif
 
     memset(p, 0, sizeof(*p));
     INIT_LIST_HEAD(&p->inflight);
     INIT_LIST_HEAD(&p->lr_queue);
     p->irq = virq;
+#ifdef CONFIG_GICV3
     p->lpi_vcpu_id = INVALID_VCPU_ID;
+#endif
 }
 
 static void vgic_rank_init(struct vgic_irq_rank *rank, uint8_t index,
@@ -243,9 +247,11 @@ static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
 {
     struct vgic_irq_rank *rank;
 
+#ifdef CONFIG_GICV3
     /* LPIs don't have a rank, also store their priority separately. */
     if ( is_lpi(virq) )
         return v->domain->arch.vgic.handler->lpi_get_priority(v->domain, virq);
+#endif
 
     rank = vgic_rank_irq(v, virq);
     return ACCESS_ONCE(rank->priority[virq & INTERRUPT_RANK_MASK]);
@@ -256,8 +262,10 @@ bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
     unsigned long flags;
     struct pending_irq *p;
 
+#ifdef CONFIG_GICV3
     /* This will never be called for an LPI, as we don't migrate them. */
     ASSERT(!is_lpi(irq));
+#endif
 
     spin_lock_irqsave(&old->arch.vgic.lock, flags);
 
@@ -312,6 +320,7 @@ void arch_move_irqs(struct vcpu *v)
     struct vcpu *v_target;
     int i;
 
+#ifdef CONFIG_GICV3
     /*
      * We don't migrate LPIs at the moment.
      * If we ever do, we must make sure that the struct pending_irq does
@@ -322,6 +331,7 @@ void arch_move_irqs(struct vcpu *v)
      * don't participate.
      */
     ASSERT(!is_lpi(vgic_num_irqs(d) - 1));
+#endif
 
     for ( i = 32; i < vgic_num_irqs(d); i++ )
     {
@@ -343,8 +353,10 @@ void vgic_disable_irqs(struct vcpu *v, uint32_t r, int n)
     int i = 0;
     struct vcpu *v_target;
 
+#ifdef CONFIG_GICV3
     /* LPIs will never be disabled via this function. */
     ASSERT(!is_lpi(32 * n + 31));
+#endif
 
     while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
         irq = i + (32 * n);
@@ -393,8 +405,10 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n)
     struct vcpu *v_target;
     struct domain *d = v->domain;
 
+#ifdef CONFIG_GICV3
     /* LPIs will never be enabled via this function. */
     ASSERT(!is_lpi(32 * n + 31));
+#endif
 
     while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
         irq = i + (32 * n);
@@ -490,8 +504,10 @@ struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
      * are used for SPIs; the rests are used for per cpu irqs */
     if ( irq < 32 )
         n = &v->arch.vgic.pending_irqs[irq];
+#ifdef CONFIG_GICV3
     else if ( is_lpi(irq) )
         n = v->domain->arch.vgic.handler->lpi_to_pending(v->domain, irq);
+#endif
     else
         n = &v->domain->arch.vgic.pending_irqs[irq - 32];
     return n;
@@ -550,12 +566,14 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
     spin_lock_irqsave(&v->arch.vgic.lock, flags);
 
     n = irq_to_pending(v, virq);
+#ifdef CONFIG_GICV3
     /* If an LPI has been removed, there is nothing to inject here. */
     if ( unlikely(!n) )
     {
         spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
         return;
     }
+#endif
 
     /* vcpu offline */
     if ( test_bit(_VPF_down, &v->pause_flags) )
@@ -607,8 +625,10 @@ bool vgic_evtchn_irq_pending(struct vcpu *v)
     struct pending_irq *p;
 
     p = irq_to_pending(v, v->domain->arch.evtchn_irq);
+#ifdef CONFIG_GICV3
     /* Does not work for LPIs. */
     ASSERT(!is_lpi(v->domain->arch.evtchn_irq));
+#endif
 
     return list_empty(&p->inflight);
 }
diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
index e2844dc..b8dbdaf 100644
--- a/xen/arch/arm/vgic/vgic.c
+++ b/xen/arch/arm/vgic/vgic.c
@@ -703,8 +703,10 @@ bool vgic_evtchn_irq_pending(struct vcpu *v)
     unsigned long flags;
     bool pending;
 
+#ifdef CONFIG_GICV3
     /* Does not work for LPIs. */
     ASSERT(!is_lpi(v->domain->arch.evtchn_irq));
+#endif
 
     irq = vgic_get_irq(v->domain, v, v->domain->arch.evtchn_irq);
     spin_lock_irqsave(&irq->irq_lock, flags);
diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
index e45d574..4f1ef3c 100644
--- a/xen/include/asm-arm/irq.h
+++ b/xen/include/asm-arm/irq.h
@@ -63,10 +63,12 @@ struct irq_desc *__irq_to_desc(int irq);
 
 void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq);
 
+#ifdef CONFIG_GICV3
 static inline bool is_lpi(unsigned int irq)
 {
     return irq >= LPI_OFFSET;
 }
+#endif
 
 #define domain_pirq_to_irq(d, pirq) (pirq)
 
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 447d24e..c5cb63f 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -63,27 +63,32 @@ struct pending_irq
      * GIC_IRQ_GUEST_MIGRATING: the irq is being migrated to a different
      * vcpu while it is still inflight and on an GICH_LR register on the
      * old vcpu.
-     *
-     * GIC_IRQ_GUEST_PRISTINE_LPI: the IRQ is a newly mapped LPI, which
-     * has never been in an LR before. This means that any trace of an
-     * LPI with the same number in an LR must be from an older LPI, which
-     * has been unmapped before.
-     *
      */
 #define GIC_IRQ_GUEST_QUEUED   0
 #define GIC_IRQ_GUEST_ACTIVE   1
 #define GIC_IRQ_GUEST_VISIBLE  2
 #define GIC_IRQ_GUEST_ENABLED  3
 #define GIC_IRQ_GUEST_MIGRATING   4
+#ifdef CONFIG_GICV3
+    /*
+     * GIC_IRQ_GUEST_PRISTINE_LPI: the IRQ is a newly mapped LPI, which
+     * has never been in an LR before. This means that any trace of an
+     * LPI with the same number in an LR must be from an older LPI, which
+     * has been unmapped before.
+     * Valid for GICV3 only.
+     */
 #define GIC_IRQ_GUEST_PRISTINE_LPI  5
+#endif
     unsigned long status;
     struct irq_desc *desc; /* only set it the irq corresponds to a physical irq */
     unsigned int irq;
 #define GIC_INVALID_LR         (uint8_t)~0
     uint8_t lr;
     uint8_t priority;
+#ifdef CONFIG_GICV3
     uint8_t lpi_priority;       /* Caches the priority if this is an LPI. */
     uint8_t lpi_vcpu_id;        /* The VCPU for an LPI. */
+#endif
     /* inflight is used to append instances of pending_irq to
      * vgic.inflight_irqs */
     struct list_head inflight;
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 03/16] vgic: move pause_flags check out of vgic spinlock
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
  2018-11-28 21:31 ` [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path Andrii Anisov
  2018-11-28 21:31 ` [RFC 02/16] hack: drop GIC v3 support Andrii Anisov
@ 2018-11-28 21:31 ` Andrii Anisov
  2018-11-28 22:02   ` Julien Grall
  2018-11-28 21:31 ` [RFC 04/16] vgic: move irq_to_pending out of lock Andrii Anisov
                   ` (13 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:31 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Pause_flags is not related to vgic spinlock, so reduce code
under lock.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/vgic.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index a2419d0..a64633f 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -563,6 +563,12 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
         v = vgic_get_target_vcpu(d->vcpu[0], virq);
     };
 
+    /* vcpu offline */
+    if ( unlikely(test_bit(_VPF_down, &v->pause_flags)) )
+    {
+        return;
+    }
+
     spin_lock_irqsave(&v->arch.vgic.lock, flags);
 
     n = irq_to_pending(v, virq);
@@ -575,13 +581,6 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
     }
 #endif
 
-    /* vcpu offline */
-    if ( test_bit(_VPF_down, &v->pause_flags) )
-    {
-        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
-        return;
-    }
-
     set_bit(GIC_IRQ_GUEST_QUEUED, &n->status);
 
     if ( !list_empty(&n->inflight) )
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 04/16] vgic: move irq_to_pending out of lock
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (2 preceding siblings ...)
  2018-11-28 21:31 ` [RFC 03/16] vgic: move pause_flags check out of vgic spinlock Andrii Anisov
@ 2018-11-28 21:31 ` Andrii Anisov
  2018-11-29 11:07   ` Julien Grall
  2018-11-28 21:32 ` [RFC 05/16] gic-vgic: Drop an excessive clear_lrs Andrii Anisov
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:31 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

For GICV2 pending_irq allocation is not concurrent, so reduce
some code under lock.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/vgic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index a64633f..37857b1 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -267,17 +267,16 @@ bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
     ASSERT(!is_lpi(irq));
 #endif
 
-    spin_lock_irqsave(&old->arch.vgic.lock, flags);
-
     p = irq_to_pending(old, irq);
 
     /* nothing to do for virtual interrupts */
     if ( p->desc == NULL )
     {
-        spin_unlock_irqrestore(&old->arch.vgic.lock, flags);
         return true;
     }
 
+    spin_lock_irqsave(&old->arch.vgic.lock, flags);
+
     /* migration already in progress, no need to do anything */
     if ( test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
     {
@@ -569,8 +568,6 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
         return;
     }
 
-    spin_lock_irqsave(&v->arch.vgic.lock, flags);
-
     n = irq_to_pending(v, virq);
 #ifdef CONFIG_GICV3
     /* If an LPI has been removed, there is nothing to inject here. */
@@ -581,6 +578,8 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
     }
 #endif
 
+    spin_lock_irqsave(&v->arch.vgic.lock, flags);
+
     set_bit(GIC_IRQ_GUEST_QUEUED, &n->status);
 
     if ( !list_empty(&n->inflight) )
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 05/16] gic-vgic: Drop an excessive clear_lrs
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (3 preceding siblings ...)
  2018-11-28 21:31 ` [RFC 04/16] vgic: move irq_to_pending out of lock Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-12-11 14:33   ` Julien Grall
  2018-11-28 21:32 ` [RFC 06/16] gic: drop interrupts enabling on interrupts processing Andrii Anisov
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

This action is excessive because for an invalid LR there is no need
to write another invalid value to a register. So we can skip it here,
saving a peripheral register access.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-vgic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 869987a..4e18169 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -227,7 +227,6 @@ static void gic_update_one_lr(struct vcpu *v, int i)
     }
     else
     {
-        gic_hw_ops->clear_lr(i);
         clear_bit(i, &this_cpu(lr_mask));
 
         if ( p->desc != NULL )
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 06/16] gic: drop interrupts enabling on interrupts processing
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (4 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 05/16] gic-vgic: Drop an excessive clear_lrs Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-28 22:06   ` Julien Grall
  2018-11-28 21:32 ` [RFC 07/16] gic-vgic:vgic: do not keep disabled IRQs in any of queues Andrii Anisov
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

This reduces the number of context switches in case we have an
interrupt storm. We will read out all of those interrupt in the
loop anyway.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 77fc06f..b10783b 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -390,10 +390,8 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
 
         if ( likely(irq >= 16 && irq < 1020) )
         {
-            local_irq_enable();
             isb();
             do_IRQ(regs, irq, is_fiq);
-            local_irq_disable();
         }
 #ifdef CONFIG_GICV3
         else if ( is_lpi(irq) )
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 07/16] gic-vgic:vgic: do not keep disabled IRQs in any of queues
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (5 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 06/16] gic: drop interrupts enabling on interrupts processing Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-28 21:32 ` [RFC 08/16] gic: separate ppi processing Andrii Anisov
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Do not put already disabled IRQs into any of queues, and remove
an IRQ from all queues on disable. Insert the IRQ into queues on
enable if needed. This also allows simplification of under-lock
decission of events needed delivery.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-vgic.c    | 26 +++++++++-----------------
 xen/arch/arm/vgic.c        | 28 +++++++++++++++++++++++++---
 xen/include/asm-arm/vgic.h |  3 ++-
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 4e18169..9bbd0c5 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -87,10 +87,6 @@ void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq)
 
     ASSERT(spin_is_locked(&v->arch.vgic.lock));
 
-    /* Don't try to update the LR if the interrupt is disabled */
-    if ( !test_bit(GIC_IRQ_GUEST_ENABLED, &n->status) )
-        return;
-
     if ( list_empty(&n->lr_queue) )
     {
         if ( v == current )
@@ -366,6 +362,7 @@ int vgic_vcpu_pending_irq(struct vcpu *v)
     const unsigned long apr = gic_hw_ops->read_apr(0);
     int mask_priority;
     int active_priority;
+    int effective_priority;
     int rc = 0;
 
     /* We rely on reading the VMCR, which is only accessible locally. */
@@ -373,28 +370,23 @@ int vgic_vcpu_pending_irq(struct vcpu *v)
 
     mask_priority = gic_hw_ops->read_vmcr_priority();
     active_priority = find_first_bit(&apr, 32);
+    effective_priority = min(mask_priority, active_priority);
 
     spin_lock_irqsave(&v->arch.vgic.lock, flags);
 
     /* TODO: We order the guest irqs by priority, but we don't change
      * the priority of host irqs. */
 
-    /* find the first enabled non-active irq, the queue is already
-     * ordered by priority */
-    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
+    /* take the first non-active irq, the queue is already
+     * ordered by priority and has no disabled IRQs*/
+    if ( !list_empty(&v->arch.vgic.inflight_irqs) )
     {
-        if ( GIC_PRI_TO_GUEST(p->priority) >= mask_priority )
-            goto out;
-        if ( GIC_PRI_TO_GUEST(p->priority) >= active_priority )
-            goto out;
-        if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
-        {
-            rc = 1;
-            goto out;
-        }
+        p = container_of(v->arch.vgic.inflight_irqs.next, struct pending_irq,
+                          inflight);
+        if ( GIC_PRI_TO_GUEST(p->priority) < effective_priority )
+             rc = 1;
     }
 
-out:
     spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
     return rc;
 }
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 37857b1..e30a518 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -364,6 +364,7 @@ void vgic_disable_irqs(struct vcpu *v, uint32_t r, int n)
         spin_lock_irqsave(&v_target->arch.vgic.lock, flags);
         p = irq_to_pending(v_target, irq);
         clear_bit(GIC_IRQ_GUEST_ENABLED, &p->status);
+        list_del_init(&p->inflight);
         gic_remove_from_lr_pending(v_target, p);
         desc = p->desc;
         spin_unlock_irqrestore(&v_target->arch.vgic.lock, flags);
@@ -415,6 +416,23 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n)
         spin_lock_irqsave(&v_target->arch.vgic.lock, flags);
         p = irq_to_pending(v_target, irq);
         set_bit(GIC_IRQ_GUEST_ENABLED, &p->status);
+        if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) ||
+             test_bit(GIC_IRQ_GUEST_QUEUED, &p->status) )
+        {
+            struct pending_irq *iter;
+            p->priority = vgic_get_virq_priority(v, irq);
+            list_for_each_entry( iter, &v_target->arch.vgic.inflight_irqs,
+                                 inflight )
+            {
+                if ( iter->priority > p->priority )
+                {
+                    list_add_tail(&p->inflight, &iter->inflight);
+                    goto out;
+                }
+            }
+            list_add_tail(&p->inflight, &v_target->arch.vgic.inflight_irqs);
+        }
+out:
         if ( !list_empty(&p->inflight) && !test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) )
             gic_raise_guest_irq(v_target, irq, p->priority);
         spin_unlock_irqrestore(&v_target->arch.vgic.lock, flags);
@@ -582,6 +600,12 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
 
     set_bit(GIC_IRQ_GUEST_QUEUED, &n->status);
 
+    if ( !test_bit(GIC_IRQ_GUEST_ENABLED, &n->status) )
+    {/*Do not insert a disabled IRQ into any queue*/
+        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
+        return;
+    }
+
     if ( !list_empty(&n->inflight) )
     {
         gic_raise_inflight_irq(v, virq);
@@ -591,9 +615,7 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
     priority = vgic_get_virq_priority(v, virq);
     n->priority = priority;
 
-    /* the irq is enabled */
-    if ( test_bit(GIC_IRQ_GUEST_ENABLED, &n->status) )
-        gic_raise_guest_irq(v, virq, priority);
+    gic_raise_guest_irq(v, virq, priority);
 
     list_for_each_entry ( iter, &v->arch.vgic.inflight_irqs, inflight )
     {
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index c5cb63f..7507162 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -58,7 +58,8 @@ struct pending_irq
      * irq is enabled at the vgicd level:
      *
      * GIC_IRQ_GUEST_ENABLED: the guest IRQ is enabled at the VGICD
-     * level (GICD_ICENABLER/GICD_ISENABLER).
+     * level (GICD_ICENABLER/GICD_ISENABLER). Disabled guest IRQ do not appear
+     * in an inflight queue
      *
      * GIC_IRQ_GUEST_MIGRATING: the irq is being migrated to a different
      * vcpu while it is still inflight and on an GICH_LR register on the
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 08/16] gic: separate ppi processing
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (6 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 07/16] gic-vgic:vgic: do not keep disabled IRQs in any of queues Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-12-12 12:37   ` Andrii Anisov
  2018-11-28 21:32 ` [RFC 09/16] gic-vgic:vgic: avoid excessive conversions Andrii Anisov
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

PPI are pcpu private, so their processing is not concurrent and do not
need pcpu shared flags and correspondent lockings. So avoid odd bits
sets, checks and locks.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic.c        |  8 ++++++--
 xen/arch/arm/irq.c        | 32 ++++++++++++++++++++++++++++++++
 xen/include/asm-arm/irq.h |  1 +
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index b10783b..52e4270 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -387,8 +387,7 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
     do  {
         /* Reading IRQ will ACK it */
         irq = gic_hw_ops->read_irq();
-
-        if ( likely(irq >= 16 && irq < 1020) )
+        if ( likely(irq >= 32 && irq < 1020) )
         {
             isb();
             do_IRQ(regs, irq, is_fiq);
@@ -406,6 +405,11 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
         {
             do_sgi(regs, irq);
         }
+        else if ( irq < 32 )
+        {
+            isb();
+            do_ppi(regs, irq);
+        }
         else
         {
             local_irq_disable();
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index 098281f..596a8b8 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -268,6 +268,38 @@ out_no_end:
     irq_exit();
 }
 
+void do_ppi(struct cpu_user_regs *regs, unsigned int irq)
+{
+    struct irq_desc *desc = irq_to_desc(irq);
+    struct irqaction *action;
+
+    irq_enter();
+
+    desc->handler->ack(desc);
+
+    if ( unlikely(!desc->action) )
+    {
+        printk("Unknown %s %#3.3x\n",
+               "IRQ", irq);
+        goto out;
+    }
+
+    if ( test_bit(_IRQ_DISABLED, &desc->status) )
+        goto out;
+
+    action = desc->action;
+
+    do
+    {
+        action->handler(irq, action->dev_id, regs);
+        action = action->next;
+    } while ( action );
+
+out:
+    desc->handler->end(desc);
+    irq_exit();
+}
+
 void release_irq(unsigned int irq, const void *dev_id)
 {
     struct irq_desc *desc;
diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
index 4f1ef3c..3143468 100644
--- a/xen/include/asm-arm/irq.h
+++ b/xen/include/asm-arm/irq.h
@@ -62,6 +62,7 @@ struct irq_desc *__irq_to_desc(int irq);
 #define irq_to_desc(irq)    __irq_to_desc(irq)
 
 void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq);
+void do_ppi(struct cpu_user_regs *regs, unsigned int irq);
 
 #ifdef CONFIG_GICV3
 static inline bool is_lpi(unsigned int irq)
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 09/16] gic-vgic:vgic: avoid excessive conversions
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (7 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 08/16] gic: separate ppi processing Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-29 10:23   ` Julien Grall
  2018-11-28 21:32 ` [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops Andrii Anisov
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Avoid excessive conversions between pending_irq and irq number/priority.
This simlifies functions interface and reduces under locks code size.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-vgic.c    | 10 +++-------
 xen/arch/arm/vgic-v3-its.c |  2 +-
 xen/arch/arm/vgic.c        |  6 +++---
 xen/include/asm-arm/gic.h  |  3 ---
 xen/include/asm-arm/vgic.h |  2 ++
 5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 9bbd0c5..f0e6c6f 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -75,10 +75,8 @@ void gic_remove_from_lr_pending(struct vcpu *v, struct pending_irq *p)
     list_del_init(&p->lr_queue);
 }
 
-void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq)
+void gic_raise_inflight_irq(struct vcpu *v, struct pending_irq *n)
 {
-    struct pending_irq *n = irq_to_pending(v, virtual_irq);
-
 #ifdef CONFIG_GICV3
     /* If an LPI has been removed meanwhile, there is nothing left to raise. */
     if ( unlikely(!n) )
@@ -135,12 +133,10 @@ static unsigned int gic_find_unused_lr(struct vcpu *v,
     return lr;
 }
 
-void gic_raise_guest_irq(struct vcpu *v, unsigned int virtual_irq,
-        unsigned int priority)
+void gic_raise_guest_irq(struct vcpu *v, struct pending_irq *p)
 {
     int i;
     unsigned int nr_lrs = gic_get_nr_lrs();
-    struct pending_irq *p = irq_to_pending(v, virtual_irq);
 
     ASSERT(spin_is_locked(&v->arch.vgic.lock));
 
@@ -233,7 +229,7 @@ static void gic_update_one_lr(struct vcpu *v, int i)
         if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) &&
              test_bit(GIC_IRQ_GUEST_QUEUED, &p->status) &&
              !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
-            gic_raise_guest_irq(v, irq, p->priority);
+            gic_raise_guest_irq(v, p);
         else {
             list_del_init(&p->inflight);
             /*
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 3c6693d..f9bd27f 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -448,7 +448,7 @@ static void update_lpi_vgic_status(struct vcpu *v, struct pending_irq *p)
     {
         if ( !list_empty(&p->inflight) &&
              !test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) )
-            gic_raise_guest_irq(v, p->irq, p->lpi_priority);
+            gic_raise_guest_irq(v, p);
     }
     else
         gic_remove_from_lr_pending(v, p);
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index e30a518..c142476 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -434,7 +434,7 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n)
         }
 out:
         if ( !list_empty(&p->inflight) && !test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) )
-            gic_raise_guest_irq(v_target, irq, p->priority);
+            gic_raise_guest_irq(v_target, p);
         spin_unlock_irqrestore(&v_target->arch.vgic.lock, flags);
         if ( p->desc != NULL )
         {
@@ -608,14 +608,14 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
 
     if ( !list_empty(&n->inflight) )
     {
-        gic_raise_inflight_irq(v, virq);
+        gic_raise_inflight_irq(v, n);
         goto out;
     }
 
     priority = vgic_get_virq_priority(v, virq);
     n->priority = priority;
 
-    gic_raise_guest_irq(v, virq, priority);
+    gic_raise_guest_irq(v, n);
 
     list_for_each_entry ( iter, &v->arch.vgic.inflight_irqs, inflight )
     {
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index fab02f1..3d7394d 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -252,9 +252,6 @@ int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
 extern void gic_clear_pending_irqs(struct vcpu *v);
 
 extern void init_maintenance_interrupt(void);
-extern void gic_raise_guest_irq(struct vcpu *v, unsigned int irq,
-        unsigned int priority);
-extern void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq);
 
 /* Accept an interrupt from the GIC and dispatch its handler */
 extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 7507162..a27a1a9 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -287,6 +287,8 @@ enum gic_sgi_mode;
 extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
 extern void vgic_remove_irq_from_queues(struct vcpu *v, struct pending_irq *p);
 extern void gic_remove_from_lr_pending(struct vcpu *v, struct pending_irq *p);
+extern void gic_raise_guest_irq(struct vcpu *v, struct pending_irq *p);
+extern void gic_raise_inflight_irq(struct vcpu *v, struct pending_irq *n);
 extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
 extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
 extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (8 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 09/16] gic-vgic:vgic: avoid excessive conversions Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-29 12:14   ` Andre Przywara
  2018-11-28 21:32 ` [RFC 11/16] irq: skip action avalability check for guest's IRQ Andrii Anisov
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

All bit operations for gic, vgic and gic-vgic are performed under
spinlocks, so there is no need for atomic bit ops here, they only
introduce excessive call to functions used more expensive exclusive
ARM instructions.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-vgic.c | 16 ++++++++++++++++
 xen/arch/arm/gic.c      | 16 ++++++++++++++++
 xen/arch/arm/vgic.c     | 16 ++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index f0e6c6f..5b73bbd 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -25,6 +25,22 @@
 #include <asm/gic.h>
 #include <asm/vgic.h>
 
+#undef set_bit
+#define set_bit(nr, addr) (*(addr) |= (1<<nr))
+
+#undef clear_bit
+#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
+
+#undef test_bit
+#define test_bit(nr,addr) (*(addr) & (1<<nr))
+
+#undef test_and_clear_bit
+#define test_and_clear_bit(nr,addr) ({                    \
+    bool _x; \
+    _x = (*(addr) & (1<<nr));                        \
+    (*(addr) &= ~(1<<nr));                                \
+    (_x);})
+
 #define lr_all_full() (this_cpu(lr_mask) == ((1 << gic_get_nr_lrs()) - 1))
 
 #undef GIC_DEBUG
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 52e4270..d558059 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -40,6 +40,22 @@
 
 DEFINE_PER_CPU(uint64_t, lr_mask);
 
+#undef set_bit
+#define set_bit(nr, addr) (*(addr) |= (1<<nr))
+
+#undef clear_bit
+#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
+
+#undef test_bit
+#define test_bit(nr,addr) (*(addr) & (1<<nr))
+
+#undef test_and_clear_bit
+#define test_and_clear_bit(nr,addr) ({                    \
+    bool _x; \
+    _x = (*(addr) & (1<<nr));                        \
+    (*(addr) &= ~(1<<nr));                                \
+    (_x);})
+
 #undef GIC_DEBUG
 
 const struct gic_hw_operations *gic_hw_ops;
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index c142476..7691310 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -33,6 +33,22 @@
 #include <asm/gic.h>
 #include <asm/vgic.h>
 
+#undef set_bit
+#define set_bit(nr, addr) (*(addr) |= (1<<nr))
+
+#undef clear_bit
+#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
+
+#undef test_bit
+#define test_bit(nr,addr) (*(addr) & (1<<nr))
+
+#undef test_and_clear_bit
+#define test_and_clear_bit(nr,addr) ({                    \
+    bool _x = (*(addr) & (1<<nr));                        \
+    (*(addr) &= ~(1<<nr));                                \
+    return (_x);                                                 \
+})
+
 static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
 {
     if ( rank == 0 )
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 11/16] irq: skip action avalability check for guest's IRQ
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (9 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-12-11 14:48   ` Julien Grall
  2018-11-28 21:32 ` [RFC 12/16] gic-v2: Write HCR only on change Andrii Anisov
                   ` (5 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

An IRQ assigned to guest always has an action. This removes
another odd check on guest IRQ path. Also getting an unknown
interrupt is very unlikely on a non-debug platform.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/irq.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index 596a8b8..5debfc5 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -205,13 +205,6 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
     spin_lock(&desc->lock);
     desc->handler->ack(desc);
 
-    if ( !desc->action )
-    {
-        printk("Unknown %s %#3.3x\n",
-               is_fiq ? "FIQ" : "IRQ", irq);
-        goto out;
-    }
-
     if ( test_bit(_IRQ_GUEST, &desc->status) )
     {
         struct irq_guest *info = irq_get_guest_info(desc);
@@ -229,6 +222,13 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
         goto out_no_end;
     }
 
+    if ( unlikely(!desc->action) )
+    {
+        printk("Unknown %s %#3.3x\n",
+               is_fiq ? "FIQ" : "IRQ", irq);
+        goto out;
+    }
+
     set_bit(_IRQ_PENDING, &desc->status);
 
     /*
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 12/16] gic-v2: Write HCR only on change
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (10 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 11/16] irq: skip action avalability check for guest's IRQ Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-29 16:36   ` Julien Grall
  2018-11-28 21:32 ` [RFC 13/16] gic-vgic: skip irqs locking Andrii Anisov
                   ` (4 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

This saves one write to peripheral HCR register per hypervisor entry for
most cases.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-v2.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
index 1a744c5..25147bd 100644
--- a/xen/arch/arm/gic-v2.c
+++ b/xen/arch/arm/gic-v2.c
@@ -579,14 +579,17 @@ static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
 
 static void gicv2_hcr_status(uint32_t flag, bool status)
 {
-    uint32_t hcr = readl_gich(GICH_HCR);
+    uint32_t hcr, ohcr;
+
+    ohcr = hcr = readl_gich(GICH_HCR);
 
     if ( status )
         hcr |= flag;
     else
         hcr &= (~flag);
 
-    writel_gich(hcr, GICH_HCR);
+    if ( hcr != ohcr )
+        writel_gich(hcr, GICH_HCR);
 }
 
 static unsigned int gicv2_read_vmcr_priority(void)
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 13/16] gic-vgic: skip irqs locking
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (11 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 12/16] gic-v2: Write HCR only on change Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-12-12 12:07   ` Julien Grall
  2018-11-28 21:32 ` [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu Andrii Anisov
                   ` (3 subsequent siblings)
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Those fucntions are called under IRQs disabled already, so avoid
additional flags saving and restore.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-vgic.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 5b73bbd..e511f91 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -268,7 +268,6 @@ static void gic_update_one_lr(struct vcpu *v, int i)
 void vgic_sync_from_lrs(struct vcpu *v)
 {
     int i = 0;
-    unsigned long flags;
     unsigned int nr_lrs = gic_get_nr_lrs();
 
     /* The idle domain has no LRs to be cleared. Since gic_restore_state
@@ -279,7 +278,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
 
     gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
 
-    spin_lock_irqsave(&v->arch.vgic.lock, flags);
+    spin_lock(&v->arch.vgic.lock);
 
     while ((i = find_next_bit((const unsigned long *) &this_cpu(lr_mask),
                               nr_lrs, i)) < nr_lrs ) {
@@ -287,7 +286,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
         i++;
     }
 
-    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
+    spin_unlock(&v->arch.vgic.lock);
 }
 
 static void gic_restore_pending_irqs(struct vcpu *v)
@@ -295,11 +294,10 @@ static void gic_restore_pending_irqs(struct vcpu *v)
     int lr = 0;
     struct pending_irq *p, *t, *p_r;
     struct list_head *inflight_r;
-    unsigned long flags;
     unsigned int nr_lrs = gic_get_nr_lrs();
     int lrs = nr_lrs;
 
-    spin_lock_irqsave(&v->arch.vgic.lock, flags);
+    spin_lock(&v->arch.vgic.lock);
 
     if ( list_empty(&v->arch.vgic.lr_pending) )
         goto out;
@@ -343,7 +341,7 @@ found:
     }
 
 out:
-    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
+    spin_unlock(&v->arch.vgic.lock);
 }
 
 void gic_clear_pending_irqs(struct vcpu *v)
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (12 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 13/16] gic-vgic: skip irqs locking Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-29 15:53   ` Julien Grall
  2018-11-29 16:00   ` Wei Liu
  2018-11-28 21:32 ` [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks Andrii Anisov
                   ` (2 subsequent siblings)
  16 siblings, 2 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

From: Andrii Anisov <andrii_anisov@epam.com>

Simplify context restore from idle vcpu to the one ran before it.
This improves low cpu load but high irq rate use-cases.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c   | 21 +++++++++++----------
 xen/include/xen/sched.h |  1 +
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 1d926dc..8e886b7 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -187,9 +187,6 @@ static void ctxt_switch_to(struct vcpu *n)
     WRITE_SYSREG32(vpidr, VPIDR_EL2);
     WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
 
-    /* VGIC */
-    gic_restore_state(n);
-
     /* VFP */
     vfp_restore_state(n);
 
@@ -263,11 +260,6 @@ static void ctxt_switch_to(struct vcpu *n)
     WRITE_SYSREG(n->arch.csselr, CSSELR_EL1);
 
     isb();
-
-    /* This is could trigger an hardware interrupt from the virtual
-     * timer. The interrupt needs to be injected into the guest. */
-    WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
-    virt_timer_restore(n);
 }
 
 /* Update per-VCPU guest runstate shared memory area (if registered). */
@@ -302,8 +294,17 @@ static void update_runstate_area(struct vcpu *v)
 static void schedule_tail(struct vcpu *prev)
 {
     ctxt_switch_from(prev);
-
-    ctxt_switch_to(current);
+    if ( !(is_idle_vcpu(prev) && (prev->prev == current)) )
+        ctxt_switch_to(current);
+    /* VGIC */
+    if ( !is_idle_vcpu(current) )
+    {
+        gic_restore_state(current);
+        WRITE_SYSREG32(current->arch.cntkctl, CNTKCTL_EL1);
+        virt_timer_restore(current);
+    }
+    else
+        current->prev = prev;
 
     local_irq_enable();
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 0309c1f..e85108d 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -272,6 +272,7 @@ struct vcpu
     struct vpci_vcpu vpci;
 
     struct arch_vcpu arch;
+    struct vcpu *prev;
 };
 
 /* Per-domain lock can be recursively acquired in fault handlers. */
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (13 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-28 21:58   ` Julien Grall
  2018-11-28 21:32 ` [RFC 16/16] gic: vgic: align frequently accessed data by cache line size Andrii Anisov
  2018-11-29  7:40 ` [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2 Andrii Anisov
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Use values cached in `struct vcpu` for guest IRQs processing.
This also reduces LR stores and restores on VCPU switch.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/gic-v2.c     | 56 +++++++++++++++++++++++++++++++----------------
 xen/arch/arm/gic-vgic.c   |  8 +++++--
 xen/include/asm-arm/gic.h |  5 ++++-
 3 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
index 25147bd..a2eb0a7 100644
--- a/xen/arch/arm/gic-v2.c
+++ b/xen/arch/arm/gic-v2.c
@@ -176,31 +176,18 @@ static unsigned int gicv2_cpu_mask(const cpumask_t *cpumask)
 
 static void gicv2_save_state(struct vcpu *v)
 {
-    int i;
-
-    /* No need for spinlocks here because interrupts are disabled around
-     * this call and it only accesses struct vcpu fields that cannot be
-     * accessed simultaneously by another pCPU.
-     */
-    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
-        v->arch.gic.v2.lr[i] = readl_gich(GICH_LR + i * 4);
-
     v->arch.gic.v2.apr = readl_gich(GICH_APR);
     v->arch.gic.v2.vmcr = readl_gich(GICH_VMCR);
     /* Disable until next VCPU scheduled */
     writel_gich(0, GICH_HCR);
 }
 
-static void gicv2_restore_state(const struct vcpu *v)
+static void gicv2_restore_state(struct vcpu *v)
 {
-    int i;
-
-    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
-        writel_gich(v->arch.gic.v2.lr[i], GICH_LR + i * 4);
-
     writel_gich(v->arch.gic.v2.apr, GICH_APR);
     writel_gich(v->arch.gic.v2.vmcr, GICH_VMCR);
     writel_gich(GICH_HCR_EN, GICH_HCR);
+    v->arch.gic.v2.lr_update_mask = (1 << gic_hw_ops->info->nr_lrs) - 1;
 }
 
 static void gicv2_dump_state(const struct vcpu *v)
@@ -493,6 +480,7 @@ static void gicv2_disable_interface(void)
 static void gicv2_update_lr(int lr, unsigned int virq, uint8_t priority,
                             unsigned int hw_irq, unsigned int state)
 {
+    struct vcpu *v = current;
     uint32_t lr_reg;
 
     BUG_ON(lr >= gicv2_info.nr_lrs);
@@ -507,19 +495,23 @@ static void gicv2_update_lr(int lr, unsigned int virq, uint8_t priority,
         lr_reg |= GICH_V2_LR_HW | ((hw_irq & GICH_V2_LR_PHYSICAL_MASK )
                                    << GICH_V2_LR_PHYSICAL_SHIFT);
 
-    writel_gich(lr_reg, GICH_LR + lr * 4);
+    v->arch.gic.v2.lr[lr] = lr_reg;
+    v->arch.gic.v2.lr_update_mask |= 1<<lr;
 }
 
 static void gicv2_clear_lr(int lr)
 {
-    writel_gich(0, GICH_LR + lr * 4);
+    struct vcpu *v = current;
+
+    v->arch.gic.v2.lr[lr] = 0;
+    v->arch.gic.v2.lr_update_mask |= 1<<lr;
 }
 
 static void gicv2_read_lr(int lr, struct gic_lr *lr_reg)
 {
     uint32_t lrv;
 
-    lrv          = readl_gich(GICH_LR + lr * 4);
+    lrv = current->arch.gic.v2.lr[lr];
     lr_reg->virq = (lrv >> GICH_V2_LR_VIRTUAL_SHIFT) & GICH_V2_LR_VIRTUAL_MASK;
     lr_reg->priority = (lrv >> GICH_V2_LR_PRIORITY_SHIFT) & GICH_V2_LR_PRIORITY_MASK;
     lr_reg->pending = lrv & GICH_V2_LR_PENDING;
@@ -546,6 +538,7 @@ static void gicv2_read_lr(int lr, struct gic_lr *lr_reg)
 static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
 {
     uint32_t lrv = 0;
+    struct vcpu *v = current;
 
     lrv = (((lr_reg->virq & GICH_V2_LR_VIRTUAL_MASK) << GICH_V2_LR_VIRTUAL_SHIFT)   |
           ((uint32_t)(lr_reg->priority & GICH_V2_LR_PRIORITY_MASK)
@@ -574,7 +567,30 @@ static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
         lrv |= (uint32_t)lr_reg->virt.source << GICH_V2_LR_CPUID_SHIFT;
     }
 
-    writel_gich(lrv, GICH_LR + lr * 4);
+    v->arch.gic.v2.lr[lr] = lrv;
+    v->arch.gic.v2.lr_update_mask |= 1<<lr;
+}
+
+static void gicv2_fetch_lrs(struct vcpu *v)
+{
+    int i;
+
+    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
+        if ( this_cpu(lr_mask) & 1<<i )
+            v->arch.gic.v2.lr[i] = readl_gich(GICH_LR + i * 4);
+        else
+            v->arch.gic.v2.lr[i] = 0;
+}
+
+static void gicv2_push_lrs(struct vcpu *v)
+{
+    int i;
+    uint64_t mask = v->arch.gic.v2.lr_update_mask;
+
+    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
+        if ( mask & 1<<i )
+            writel_gich(v->arch.gic.v2.lr[i], GICH_LR + i * 4);
+    v->arch.gic.v2.lr_update_mask = 0;
 }
 
 static void gicv2_hcr_status(uint32_t flag, bool status)
@@ -1360,6 +1376,8 @@ const static struct gic_hw_operations gicv2_ops = {
     .map_hwdom_extra_mappings = gicv2_map_hwdown_extra_mappings,
     .iomem_deny_access   = gicv2_iomem_deny_access,
     .do_LPI              = gicv2_do_LPI,
+    .fetch_lrs           = gicv2_fetch_lrs,
+    .push_lrs            = gicv2_push_lrs,
 };
 
 /* Set up the GIC */
diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index e511f91..86c8ae2 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -277,6 +277,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
         return;
 
     gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
+    gic_hw_ops->fetch_lrs(v);
 
     spin_lock(&v->arch.vgic.lock);
 
@@ -403,11 +404,14 @@ int vgic_vcpu_pending_irq(struct vcpu *v)
 
 void vgic_sync_to_lrs(void)
 {
+    struct vcpu *v = current;
     ASSERT(!local_irq_is_enabled());
 
-    gic_restore_pending_irqs(current);
+    gic_restore_pending_irqs(v);
 
-    if ( !list_empty(&current->arch.vgic.lr_pending) && lr_all_full() )
+    gic_hw_ops->push_lrs(v);
+
+    if ( !list_empty(&v->arch.vgic.lr_pending) && lr_all_full() )
         gic_hw_ops->update_hcr_status(GICH_HCR_UIE, true);
 }
 
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 3d7394d..add2566 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -187,6 +187,7 @@ struct gic_v2 {
     uint32_t vmcr;
     uint32_t apr;
     uint32_t lr[64];
+    uint64_t lr_update_mask;
 };
 
 /*
@@ -323,7 +324,7 @@ struct gic_hw_operations {
     /* Save GIC registers */
     void (*save_state)(struct vcpu *);
     /* Restore GIC registers */
-    void (*restore_state)(const struct vcpu *);
+    void (*restore_state)(struct vcpu *);
     /* Dump GIC LR register information */
     void (*dump_state)(const struct vcpu *);
 
@@ -384,6 +385,8 @@ struct gic_hw_operations {
     int (*iomem_deny_access)(const struct domain *d);
     /* Handle LPIs, which require special handling */
     void (*do_LPI)(unsigned int lpi);
+    void (*fetch_lrs) (struct vcpu *v);
+    void (*push_lrs) (struct vcpu *v);
 };
 
 extern const struct gic_hw_operations *gic_hw_ops;
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [RFC 16/16] gic: vgic: align frequently accessed data by cache line size
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (14 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks Andrii Anisov
@ 2018-11-28 21:32 ` Andrii Anisov
  2018-11-29 16:24   ` Julien Grall
  2018-11-29  7:40 ` [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2 Andrii Anisov
  16 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-11-28 21:32 UTC (permalink / raw)
  Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

From: Andrii Anisov <andrii_anisov@epam.com>

Cache line size assumed 64 byte as for H3. Align the `struct
pending_irq` and allocate lrs shadow aligned to cache line size.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c        | 4 ++++
 xen/arch/arm/vgic.c          | 9 +++++++++
 xen/include/asm-arm/config.h | 2 +-
 xen/include/asm-arm/gic.h    | 2 +-
 xen/include/asm-arm/vgic.h   | 2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 8e886b7..8bcb667 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -558,6 +558,10 @@ int arch_vcpu_create(struct vcpu *v)
     v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
     v->arch.saved_context.pc = (register_t)continue_new_vcpu;
 
+    v->arch.gic.v2.lr = xzalloc_bytes(sizeof(uint32_t) * gic_number_lines());
+    if ( v->arch.gic.v2.lr == NULL )
+        return -ENOMEM;
+
     /* Idle VCPUs don't need the rest of this setup */
     if ( is_idle_vcpu(v) )
         return rc;
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 7691310..bedfb99 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -166,6 +166,15 @@ int domain_vgic_init(struct domain *d, unsigned int nr_spis)
 
     d->arch.vgic.pending_irqs =
         xzalloc_array(struct pending_irq, d->arch.vgic.nr_spis);
+
+    if ( sizeof(struct pending_irq) != dcache_line_bytes )
+    {
+        printk ("sizeof(struct pending_irq) = %lu  is not equal to cacheline"
+                "size %lu. Is it expected?\n", sizeof(struct pending_irq),
+                dcache_line_bytes);
+        WARN();
+    }
+
     if ( d->arch.vgic.pending_irqs == NULL )
         return -ENOMEM;
 
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index bc89e84..4f3669f 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -28,7 +28,7 @@
 
 #define CONFIG_ARM 1
 
-#define CONFIG_ARM_L1_CACHE_SHIFT 7 /* XXX */
+#define CONFIG_ARM_L1_CACHE_SHIFT 6 /* XXX */
 
 #define CONFIG_SMP 1
 
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index add2566..fe44d3a 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -186,7 +186,7 @@ struct gic_v2 {
     uint32_t hcr;
     uint32_t vmcr;
     uint32_t apr;
-    uint32_t lr[64];
+    uint32_t *lr;
     uint64_t lr_update_mask;
 };
 
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index a27a1a9..d4ec96f 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -99,7 +99,7 @@ struct pending_irq
      * TODO: when implementing irq migration, taking only the current
      * vgic lock is not going to be enough. */
     struct list_head lr_queue;
-};
+}__cacheline_aligned;
 
 #define NR_INTERRUPT_PER_RANK   32
 #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks
  2018-11-28 21:32 ` [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks Andrii Anisov
@ 2018-11-28 21:58   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-28 21:58 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini, Andre Przywara

Hi Andrii,

I can't comment on the cover letter because it is in-existent...
This series needs a lot of explanation why you are doing this way
and the performance improvement over multiple use case (not only
graphics benchmark). But...

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Use values cached in `struct vcpu` for guest IRQs processing.
> This also reduces LR stores and restores on VCPU switch.

... I am trying to understand what is the benefits to improve the old 
vGIC when ideally we would like to get the new vGIC used in more place
as it is more compliant with the GIC spec. The more that this patch 
looks very similar to what the new vGIC does...

Cheers,

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic-v2.c     | 56 +++++++++++++++++++++++++++++++----------------
>   xen/arch/arm/gic-vgic.c   |  8 +++++--
>   xen/include/asm-arm/gic.h |  5 ++++-
>   3 files changed, 47 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 25147bd..a2eb0a7 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -176,31 +176,18 @@ static unsigned int gicv2_cpu_mask(const cpumask_t *cpumask)
>   
>   static void gicv2_save_state(struct vcpu *v)
>   {
> -    int i;
> -
> -    /* No need for spinlocks here because interrupts are disabled around
> -     * this call and it only accesses struct vcpu fields that cannot be
> -     * accessed simultaneously by another pCPU.
> -     */
> -    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
> -        v->arch.gic.v2.lr[i] = readl_gich(GICH_LR + i * 4);
> -
>       v->arch.gic.v2.apr = readl_gich(GICH_APR);
>       v->arch.gic.v2.vmcr = readl_gich(GICH_VMCR);
>       /* Disable until next VCPU scheduled */
>       writel_gich(0, GICH_HCR);
>   }
>   
> -static void gicv2_restore_state(const struct vcpu *v)
> +static void gicv2_restore_state(struct vcpu *v)
>   {
> -    int i;
> -
> -    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
> -        writel_gich(v->arch.gic.v2.lr[i], GICH_LR + i * 4);
> -
>       writel_gich(v->arch.gic.v2.apr, GICH_APR);
>       writel_gich(v->arch.gic.v2.vmcr, GICH_VMCR);
>       writel_gich(GICH_HCR_EN, GICH_HCR);
> +    v->arch.gic.v2.lr_update_mask = (1 << gic_hw_ops->info->nr_lrs) - 1;
>   }
>   
>   static void gicv2_dump_state(const struct vcpu *v)
> @@ -493,6 +480,7 @@ static void gicv2_disable_interface(void)
>   static void gicv2_update_lr(int lr, unsigned int virq, uint8_t priority,
>                               unsigned int hw_irq, unsigned int state)
>   {
> +    struct vcpu *v = current;
>       uint32_t lr_reg;
>   
>       BUG_ON(lr >= gicv2_info.nr_lrs);
> @@ -507,19 +495,23 @@ static void gicv2_update_lr(int lr, unsigned int virq, uint8_t priority,
>           lr_reg |= GICH_V2_LR_HW | ((hw_irq & GICH_V2_LR_PHYSICAL_MASK )
>                                      << GICH_V2_LR_PHYSICAL_SHIFT);
>   
> -    writel_gich(lr_reg, GICH_LR + lr * 4);
> +    v->arch.gic.v2.lr[lr] = lr_reg;
> +    v->arch.gic.v2.lr_update_mask |= 1<<lr;
>   }
>   
>   static void gicv2_clear_lr(int lr)
>   {
> -    writel_gich(0, GICH_LR + lr * 4);
> +    struct vcpu *v = current;
> +
> +    v->arch.gic.v2.lr[lr] = 0;
> +    v->arch.gic.v2.lr_update_mask |= 1<<lr;
>   }
>   
>   static void gicv2_read_lr(int lr, struct gic_lr *lr_reg)
>   {
>       uint32_t lrv;
>   
> -    lrv          = readl_gich(GICH_LR + lr * 4);
> +    lrv = current->arch.gic.v2.lr[lr];
>       lr_reg->virq = (lrv >> GICH_V2_LR_VIRTUAL_SHIFT) & GICH_V2_LR_VIRTUAL_MASK;
>       lr_reg->priority = (lrv >> GICH_V2_LR_PRIORITY_SHIFT) & GICH_V2_LR_PRIORITY_MASK;
>       lr_reg->pending = lrv & GICH_V2_LR_PENDING;
> @@ -546,6 +538,7 @@ static void gicv2_read_lr(int lr, struct gic_lr *lr_reg)
>   static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
>   {
>       uint32_t lrv = 0;
> +    struct vcpu *v = current;
>   
>       lrv = (((lr_reg->virq & GICH_V2_LR_VIRTUAL_MASK) << GICH_V2_LR_VIRTUAL_SHIFT)   |
>             ((uint32_t)(lr_reg->priority & GICH_V2_LR_PRIORITY_MASK)
> @@ -574,7 +567,30 @@ static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
>           lrv |= (uint32_t)lr_reg->virt.source << GICH_V2_LR_CPUID_SHIFT;
>       }
>   
> -    writel_gich(lrv, GICH_LR + lr * 4);
> +    v->arch.gic.v2.lr[lr] = lrv;
> +    v->arch.gic.v2.lr_update_mask |= 1<<lr;
> +}
> +
> +static void gicv2_fetch_lrs(struct vcpu *v)
> +{
> +    int i;
> +
> +    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
> +        if ( this_cpu(lr_mask) & 1<<i )
> +            v->arch.gic.v2.lr[i] = readl_gich(GICH_LR + i * 4);
> +        else
> +            v->arch.gic.v2.lr[i] = 0;
> +}
> +
> +static void gicv2_push_lrs(struct vcpu *v)
> +{
> +    int i;
> +    uint64_t mask = v->arch.gic.v2.lr_update_mask;
> +
> +    for ( i = 0; i < gicv2_info.nr_lrs; i++ )
> +        if ( mask & 1<<i )
> +            writel_gich(v->arch.gic.v2.lr[i], GICH_LR + i * 4);
> +    v->arch.gic.v2.lr_update_mask = 0;
>   }
>   
>   static void gicv2_hcr_status(uint32_t flag, bool status)
> @@ -1360,6 +1376,8 @@ const static struct gic_hw_operations gicv2_ops = {
>       .map_hwdom_extra_mappings = gicv2_map_hwdown_extra_mappings,
>       .iomem_deny_access   = gicv2_iomem_deny_access,
>       .do_LPI              = gicv2_do_LPI,
> +    .fetch_lrs           = gicv2_fetch_lrs,
> +    .push_lrs            = gicv2_push_lrs,
>   };
>   
>   /* Set up the GIC */
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index e511f91..86c8ae2 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -277,6 +277,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
>           return;
>   
>       gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
> +    gic_hw_ops->fetch_lrs(v);
>   
>       spin_lock(&v->arch.vgic.lock);
>   
> @@ -403,11 +404,14 @@ int vgic_vcpu_pending_irq(struct vcpu *v)
>   
>   void vgic_sync_to_lrs(void)
>   {
> +    struct vcpu *v = current;
>       ASSERT(!local_irq_is_enabled());
>   
> -    gic_restore_pending_irqs(current);
> +    gic_restore_pending_irqs(v);
>   
> -    if ( !list_empty(&current->arch.vgic.lr_pending) && lr_all_full() )
> +    gic_hw_ops->push_lrs(v);
> +
> +    if ( !list_empty(&v->arch.vgic.lr_pending) && lr_all_full() )
>           gic_hw_ops->update_hcr_status(GICH_HCR_UIE, true);
>   }
>   
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index 3d7394d..add2566 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -187,6 +187,7 @@ struct gic_v2 {
>       uint32_t vmcr;
>       uint32_t apr;
>       uint32_t lr[64];
> +    uint64_t lr_update_mask;
>   };
>   
>   /*
> @@ -323,7 +324,7 @@ struct gic_hw_operations {
>       /* Save GIC registers */
>       void (*save_state)(struct vcpu *);
>       /* Restore GIC registers */
> -    void (*restore_state)(const struct vcpu *);
> +    void (*restore_state)(struct vcpu *);
>       /* Dump GIC LR register information */
>       void (*dump_state)(const struct vcpu *);
>   
> @@ -384,6 +385,8 @@ struct gic_hw_operations {
>       int (*iomem_deny_access)(const struct domain *d);
>       /* Handle LPIs, which require special handling */
>       void (*do_LPI)(unsigned int lpi);
> +    void (*fetch_lrs) (struct vcpu *v);
> +    void (*push_lrs) (struct vcpu *v);
>   };
>   
>   extern const struct gic_hw_operations *gic_hw_ops;
> 

-- 
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path
  2018-11-28 21:31 ` [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path Andrii Anisov
@ 2018-11-28 22:00   ` Julien Grall
  2019-07-30 17:34     ` [Xen-devel] " Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-11-28 22:00 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini



On 28/11/2018 21:31, Andrii Anisov wrote:
> From: Julien Grall <julien.grall@arm.com>
> 
> This makes function enter_hypervisor_head() being executed with
> irqs locked. This also give a fine side effect - it assures that
> LRs are cleared prior to any IRQs processing, which leads to a
> better (faster) IRQs processing.

Again, this need some rationale why is it faster. And how this is going 
to impact the other vGIC...

Cheers,

-- 
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 03/16] vgic: move pause_flags check out of vgic spinlock
  2018-11-28 21:31 ` [RFC 03/16] vgic: move pause_flags check out of vgic spinlock Andrii Anisov
@ 2018-11-28 22:02   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-28 22:02 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini



On 28/11/2018 21:31, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Pause_flags is not related to vgic spinlock, so reduce code
> under lock.

Well technically this code is plain wrong... If you receive an interrupt 
routed to a vCPU down, then you will end up losing the interrupt forever.

So rather than moving it, we need to make the vGIC handling properly 
interrupt routed to an offline/inexisting vCPU. If I am not mistaken, 
the specs says that they should be queued until they are routed to 
another vCPU or the vCPU comes up.

Cheers,


-- 
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 06/16] gic: drop interrupts enabling on interrupts processing
  2018-11-28 21:32 ` [RFC 06/16] gic: drop interrupts enabling on interrupts processing Andrii Anisov
@ 2018-11-28 22:06   ` Julien Grall
  2018-12-12 12:10     ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-11-28 22:06 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini



On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This reduces the number of context switches in case we have an
> interrupt storm. We will read out all of those interrupt in the
> loop anyway.

This needs a better explanation. You might want to have a look at the 
details I provided in another discussion.

In general, I would like any changes in the vGIC to be very detailed. As 
they are usually subbtle implication on the rest of the code base.

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic.c | 2 --
>   1 file changed, 2 deletions(-)
> 
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 77fc06f..b10783b 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -390,10 +390,8 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
>   
>           if ( likely(irq >= 16 && irq < 1020) )
>           {
> -            local_irq_enable();
>               isb();
>               do_IRQ(regs, irq, is_fiq);
> -            local_irq_disable();
>           }
>   #ifdef CONFIG_GICV3
>           else if ( is_lpi(irq) )
> 

-- 
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
       [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
                   ` (15 preceding siblings ...)
  2018-11-28 21:32 ` [RFC 16/16] gic: vgic: align frequently accessed data by cache line size Andrii Anisov
@ 2018-11-29  7:40 ` Andrii Anisov
  2018-11-29 11:00   ` Julien Grall
  2018-11-29 12:07   ` Andre Przywara
  16 siblings, 2 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-11-29  7:40 UTC (permalink / raw)
  To: xen-devel, julien.grall, Stefano Stabellini; +Cc: Andrii Anisov


Hello,

Again, I sent this cover letter only to myself. So, here it is, hope it does not break the thread. Sorry for the mess.


From: Andrii Anisov <andrii.anisov@gmail.com>
Sent: Wednesday, November 28, 2018 11:31 PM
Cc: Andrii Anisov
Subject: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
  

From: Andrii Anisov <andrii_anisov@epam.com>

This patch series is an attempt to reduce IRQ latency with the
old GIC implementation (gic-vgic). These patches originally based
on XEN 4.10 release. The motivation was to improve benchmark
results of a system given to a customer for evaluation.
This patch series is tailored for GICv2 on RCAR H3. Several
of the most controversial patches (i.e. LRs shadowing) were
not shared to the customer, and here are for comments and discussion.
I hope several patches from here could be upstreamed. Some as is,
others with modifications.

There are several simple ideas behind these changes:
    - reduce an excessive code (condition checks)
    - drop an excessive peripheral register accesses
    - if not reduce, then move an excessive code out of spinlocks
    - if not drop, then move an excessive register
      accesses out of spinlocks

With porting existing patches, I've got another idea that accessing
PER_CPU variables like `current` and `lr_mask` is not really cheap.
So it should produce faster code keeping `lr_mask` solely in
`struct vcpu` and pass `current` as a function parameter instead
of calculation it each time in called functions.

Andrii Anisov (15):
  hack: drop GIC v3 support
  vgic: move pause_flags check out of vgic spinlock
  vgic: move irq_to_pending out of lock
  gic-vgic: Drop an excessive clear_lrs
  gic: drop interrupts enabling on interrupts processing
  gic-vgic:vgic: do not keep disabled IRQs in any of queues
  gic: separate ppi processing
  gic-vgic:vgic: avoid excessive conversions
  gic:vgic:gic-vgic: introduce non-atomic bitops
  irq: skip action avalability check for guest's IRQ
  gic-v2: Write HCR only on change
  gic-vgic: skip irqs locking
  hack: arm/domain: simplify context restore from idle vcpu
  hack: move gicv2 LRs reads and writes out of spinlocks
  gic: vgic: align frequently accessed data by cache line size

Julien Grall (1):
  xen/arm: Re-enable interrupt later in the trap path

 xen/arch/arm/arm64/entry.S           | 11 ++---
 xen/arch/arm/configs/arm64_defconfig |  1 +
 xen/arch/arm/domain.c                | 25 ++++++----
 xen/arch/arm/gic-v2.c                | 63 ++++++++++++++++--------
 xen/arch/arm/gic-v3-lpi.c            |  2 +
 xen/arch/arm/gic-v3.c                |  2 +
 xen/arch/arm/gic-vgic.c              | 84 +++++++++++++++++++-------------
 xen/arch/arm/gic.c                   | 32 +++++++++++--
 xen/arch/arm/irq.c                   | 46 +++++++++++++++---
 xen/arch/arm/traps.c                 |  6 +++
 xen/arch/arm/vgic-v3-its.c           |  3 +-
 xen/arch/arm/vgic.c                  | 93 ++++++++++++++++++++++++++++++------
 xen/arch/arm/vgic/vgic.c             |  2 +
 xen/include/asm-arm/config.h         |  2 +-
 xen/include/asm-arm/gic.h            | 10 ++--
 xen/include/asm-arm/irq.h            |  3 ++
 xen/include/asm-arm/vgic.h           | 24 ++++++----
 xen/include/xen/sched.h              |  1 +
 18 files changed, 299 insertions(+), 111 deletions(-)

-- 
2.7.4

    
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 09/16] gic-vgic:vgic: avoid excessive conversions
  2018-11-28 21:32 ` [RFC 09/16] gic-vgic:vgic: avoid excessive conversions Andrii Anisov
@ 2018-11-29 10:23   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 10:23 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi Andrii,

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Avoid excessive conversions between pending_irq and irq number/priority.
> This simlifies functions interface and reduces under locks code size.

I was expecting the commit message to be updated based on the discussion we had 
when you sent this patch separately.

Cheers,

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic-vgic.c    | 10 +++-------
>   xen/arch/arm/vgic-v3-its.c |  2 +-
>   xen/arch/arm/vgic.c        |  6 +++---
>   xen/include/asm-arm/gic.h  |  3 ---
>   xen/include/asm-arm/vgic.h |  2 ++
>   5 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index 9bbd0c5..f0e6c6f 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -75,10 +75,8 @@ void gic_remove_from_lr_pending(struct vcpu *v, struct pending_irq *p)
>       list_del_init(&p->lr_queue);
>   }
>   
> -void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq)
> +void gic_raise_inflight_irq(struct vcpu *v, struct pending_irq *n)
>   {
> -    struct pending_irq *n = irq_to_pending(v, virtual_irq);
> -
>   #ifdef CONFIG_GICV3
>       /* If an LPI has been removed meanwhile, there is nothing left to raise. */
>       if ( unlikely(!n) )
> @@ -135,12 +133,10 @@ static unsigned int gic_find_unused_lr(struct vcpu *v,
>       return lr;
>   }
>   
> -void gic_raise_guest_irq(struct vcpu *v, unsigned int virtual_irq,
> -        unsigned int priority)
> +void gic_raise_guest_irq(struct vcpu *v, struct pending_irq *p)
>   {
>       int i;
>       unsigned int nr_lrs = gic_get_nr_lrs();
> -    struct pending_irq *p = irq_to_pending(v, virtual_irq);
>   
>       ASSERT(spin_is_locked(&v->arch.vgic.lock));
>   
> @@ -233,7 +229,7 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>           if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) &&
>                test_bit(GIC_IRQ_GUEST_QUEUED, &p->status) &&
>                !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
> -            gic_raise_guest_irq(v, irq, p->priority);
> +            gic_raise_guest_irq(v, p);
>           else {
>               list_del_init(&p->inflight);
>               /*
> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
> index 3c6693d..f9bd27f 100644
> --- a/xen/arch/arm/vgic-v3-its.c
> +++ b/xen/arch/arm/vgic-v3-its.c
> @@ -448,7 +448,7 @@ static void update_lpi_vgic_status(struct vcpu *v, struct pending_irq *p)
>       {
>           if ( !list_empty(&p->inflight) &&
>                !test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) )
> -            gic_raise_guest_irq(v, p->irq, p->lpi_priority);
> +            gic_raise_guest_irq(v, p);
>       }
>       else
>           gic_remove_from_lr_pending(v, p);
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index e30a518..c142476 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -434,7 +434,7 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n)
>           }
>   out:
>           if ( !list_empty(&p->inflight) && !test_bit(GIC_IRQ_GUEST_VISIBLE, &p->status) )
> -            gic_raise_guest_irq(v_target, irq, p->priority);
> +            gic_raise_guest_irq(v_target, p);
>           spin_unlock_irqrestore(&v_target->arch.vgic.lock, flags);
>           if ( p->desc != NULL )
>           {
> @@ -608,14 +608,14 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
>   
>       if ( !list_empty(&n->inflight) )
>       {
> -        gic_raise_inflight_irq(v, virq);
> +        gic_raise_inflight_irq(v, n);
>           goto out;
>       }
>   
>       priority = vgic_get_virq_priority(v, virq);
>       n->priority = priority;
>   
> -    gic_raise_guest_irq(v, virq, priority);
> +    gic_raise_guest_irq(v, n);
>   
>       list_for_each_entry ( iter, &v->arch.vgic.inflight_irqs, inflight )
>       {
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index fab02f1..3d7394d 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -252,9 +252,6 @@ int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>   extern void gic_clear_pending_irqs(struct vcpu *v);
>   
>   extern void init_maintenance_interrupt(void);
> -extern void gic_raise_guest_irq(struct vcpu *v, unsigned int irq,
> -        unsigned int priority);
> -extern void gic_raise_inflight_irq(struct vcpu *v, unsigned int virtual_irq);
>   
>   /* Accept an interrupt from the GIC and dispatch its handler */
>   extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index 7507162..a27a1a9 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -287,6 +287,8 @@ enum gic_sgi_mode;
>   extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
>   extern void vgic_remove_irq_from_queues(struct vcpu *v, struct pending_irq *p);
>   extern void gic_remove_from_lr_pending(struct vcpu *v, struct pending_irq *p);
> +extern void gic_raise_guest_irq(struct vcpu *v, struct pending_irq *p);
> +extern void gic_raise_inflight_irq(struct vcpu *v, struct pending_irq *n);
>   extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>   extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>   extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
  2018-11-29  7:40 ` [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2 Andrii Anisov
@ 2018-11-29 11:00   ` Julien Grall
  2018-11-29 12:07   ` Andre Przywara
  1 sibling, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 11:00 UTC (permalink / raw)
  To: Andrii Anisov, xen-devel, Stefano Stabellini
  Cc: Andre Przywara, Andrii Anisov

Hi,

On 29/11/2018 07:40, Andrii Anisov wrote:
> 
> Hello,
> 
> Again, I sent this cover letter only to myself. So, here it is, hope it does not break the thread. Sorry for the mess.

It is seems to be part of the thread. Thank you!

> 
> 
> From: Andrii Anisov <andrii.anisov@gmail.com>
> Sent: Wednesday, November 28, 2018 11:31 PM
> Cc: Andrii Anisov
> Subject: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
>    
> 
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This patch series is an attempt to reduce IRQ latency with the
> old GIC implementation (gic-vgic). These patches originally based
> on XEN 4.10 release. The motivation was to improve benchmark
> results of a system given to a customer for evaluation.

I will repeat what I said on patch #15 and expanding it a bit. This series
is touching the old vGIC we plan to decommissioned soon. Amongst may issues
with the old vGIC the majors one are the locking is fragile
and edge/level interrupts are not handled correctly.

Furthermore, a lot of those patches seem to borrow the ideas from the new vGIC.
So I think it would be more beneficial to focus on the new vGIC that will be
the only vGIC supported soon.


> This patch series is tailored for GICv2 on RCAR H3. > Several
> of the most controversial patches (i.e. LRs shadowing) were
> not shared to the customer, and here are for comments and discussion.

We need benchmark and detailed explanation to understand where the latency
comes from and how every patches reduce the latency. Ideally I would like to
see the performance improvement done patch by patch so we can decide
which one are worth it.

> I hope several patches from here could be upstreamed. Some as is,
> others with modifications.
> 
> There are several simple ideas behind these changes:
>      - reduce an excessive code (condition checks)

This makes sense.

>      - drop an excessive peripheral register accesses

This needs discussion. I will have a look at the patches.

>      - if not reduce, then move an excessive code out of spinlocks
>      - if not drop, then move an excessive register
>        accesses out of spinlocks
In general spinlocks should not be contended otherwise you have other
issues. Moving code of spinlock is only worth it if that code may
return in most of the cases.

> 
> With porting existing patches, I've got another idea that accessing
> PER_CPU variables like `current` and `lr_mask` is not really cheap.
How come? The resulting code for lr_mask is pretty trivial.

C function:

uint64_t foo(void)
{
     return this_cpu(lr_mask);
}

Assembly code:

44:   90000000        adrp    x0, 0 <maintenance_interrupt>
48:   91000000        add     x0, x0, #0x0
4c:   d53cd041        mrs     x1, tpidr_el2
50:   f8616800        ldr     x0, [x0, x1]
54:   d65f03c0        ret

So this is one memory load. Leaving aside nested virt, TPDIR_EL2
should be fast to access.

> So it should produce faster code keeping `lr_mask` solely in
> `struct vcpu` and pass `current` as a function parameter instead
> of calculation it each time in called functions.

So you are going to save 3 instructions... But that's just a drop in the bucket 
compare to the rest of code. Actually, I would be surprised if you actually 
notice it on your benchmark.

Overall, I think that there are other places to look at where
the benefits will be much bigger. We can discuss about smaller one if the 
performance are still low.

Cheers,

> 
> Andrii Anisov (15):
>    hack: drop GIC v3 support
>    vgic: move pause_flags check out of vgic spinlock
>    vgic: move irq_to_pending out of lock
>    gic-vgic: Drop an excessive clear_lrs
>    gic: drop interrupts enabling on interrupts processing
>    gic-vgic:vgic: do not keep disabled IRQs in any of queues
>    gic: separate ppi processing
>    gic-vgic:vgic: avoid excessive conversions
>    gic:vgic:gic-vgic: introduce non-atomic bitops
>    irq: skip action avalability check for guest's IRQ
>    gic-v2: Write HCR only on change
>    gic-vgic: skip irqs locking
>    hack: arm/domain: simplify context restore from idle vcpu
>    hack: move gicv2 LRs reads and writes out of spinlocks
>    gic: vgic: align frequently accessed data by cache line size
> 
> Julien Grall (1):
>    xen/arm: Re-enable interrupt later in the trap path
> 
>   xen/arch/arm/arm64/entry.S           | 11 ++---
>   xen/arch/arm/configs/arm64_defconfig |  1 +
>   xen/arch/arm/domain.c                | 25 ++++++----
>   xen/arch/arm/gic-v2.c                | 63 ++++++++++++++++--------
>   xen/arch/arm/gic-v3-lpi.c            |  2 +
>   xen/arch/arm/gic-v3.c                |  2 +
>   xen/arch/arm/gic-vgic.c              | 84 +++++++++++++++++++-------------
>   xen/arch/arm/gic.c                   | 32 +++++++++++--
>   xen/arch/arm/irq.c                   | 46 +++++++++++++++---
>   xen/arch/arm/traps.c                 |  6 +++
>   xen/arch/arm/vgic-v3-its.c           |  3 +-
>   xen/arch/arm/vgic.c                  | 93 ++++++++++++++++++++++++++++++------
>   xen/arch/arm/vgic/vgic.c             |  2 +
>   xen/include/asm-arm/config.h         |  2 +-
>   xen/include/asm-arm/gic.h            | 10 ++--
>   xen/include/asm-arm/irq.h            |  3 ++
>   xen/include/asm-arm/vgic.h           | 24 ++++++----
>   xen/include/xen/sched.h              |  1 +
>   18 files changed, 299 insertions(+), 111 deletions(-)
> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 04/16] vgic: move irq_to_pending out of lock
  2018-11-28 21:31 ` [RFC 04/16] vgic: move irq_to_pending out of lock Andrii Anisov
@ 2018-11-29 11:07   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 11:07 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov, Andre Przywara



On 28/11/2018 21:31, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> For GICV2 pending_irq allocation is not concurrent, so reduce
> some code under lock.

So this is reverting part of 5f66da659060563df8481a86c017f07455095045
"ARM: vGIC: move irq_to_pending() calls under the VGIC VCPU lock".

The vGIC is meant to be common, so I don't think the patch would be correct for 
GICv3.

However, this function can only be called with SPI (PPI/SGI can't migrate). All 
but one SPI are associated to a physical interrupts. So I am not entirely 
convinced this will have any benefits on GICv2. Do you see any contention on the 
lock?

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/vgic.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index a64633f..37857b1 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -267,17 +267,16 @@ bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
>       ASSERT(!is_lpi(irq));
>   #endif
>   
> -    spin_lock_irqsave(&old->arch.vgic.lock, flags);
> -
>       p = irq_to_pending(old, irq);
>   
>       /* nothing to do for virtual interrupts */
>       if ( p->desc == NULL )
>       {
> -        spin_unlock_irqrestore(&old->arch.vgic.lock, flags);
>           return true;
>       }
>   
> +    spin_lock_irqsave(&old->arch.vgic.lock, flags);
> +
>       /* migration already in progress, no need to do anything */
>       if ( test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>       {
> @@ -569,8 +568,6 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
>           return;
>       }
>   
> -    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> -
>       n = irq_to_pending(v, virq);
>   #ifdef CONFIG_GICV3
>       /* If an LPI has been removed, there is nothing to inject here. */
> @@ -581,6 +578,8 @@ void vgic_inject_irq(struct domain *d, struct vcpu *v, unsigned int virq,
>       }
>   #endif
>   
> +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> +
>       set_bit(GIC_IRQ_GUEST_QUEUED, &n->status);
>   
>       if ( !list_empty(&n->inflight) )
> 

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
  2018-11-29  7:40 ` [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2 Andrii Anisov
  2018-11-29 11:00   ` Julien Grall
@ 2018-11-29 12:07   ` Andre Przywara
  1 sibling, 0 replies; 59+ messages in thread
From: Andre Przywara @ 2018-11-29 12:07 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: Andrii Anisov, julien.grall, Stefano Stabellini, xen-devel

On Thu, 29 Nov 2018 07:40:00 +0000
Andrii Anisov <Andrii_Anisov@epam.com> wrote:

> Hello,
> 
> Again, I sent this cover letter only to myself. So, here it is, hope
> it does not break the thread. Sorry for the mess.
> 
> 
> From: Andrii Anisov <andrii.anisov@gmail.com>
> Sent: Wednesday, November 28, 2018 11:31 PM
> Cc: Andrii Anisov
> Subject: [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2
>   
> 
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This patch series is an attempt to reduce IRQ latency with the
> old GIC implementation (gic-vgic). These patches originally based
> on XEN 4.10 release. The motivation was to improve benchmark
> results of a system given to a customer for evaluation.

Have you tried the new VGIC? I am curious if you see a performance drop
compared to the old VGIC. It *should* be faster in the common case of
no (or maybe one) pending interrupts, because it doesn't bother to
touch LRs at all.

> This patch series is tailored for GICv2 on RCAR H3. Several
> of the most controversial patches (i.e. LRs shadowing) were
> not shared to the customer, and here are for comments and discussion.
> I hope several patches from here could be upstreamed. Some as is,
> others with modifications.
> 
> There are several simple ideas behind these changes:
>     - reduce an excessive code (condition checks)
>     - drop an excessive peripheral register accesses
>     - if not reduce, then move an excessive code out of spinlocks
>     - if not drop, then move an excessive register
>       accesses out of spinlocks

So some of the patches contain some good ideas, but as Julien said: we
would need some numbers to prove that they are worth it.

Cheers,
Andre.
 
> With porting existing patches, I've got another idea that accessing
> PER_CPU variables like `current` and `lr_mask` is not really cheap.
> So it should produce faster code keeping `lr_mask` solely in
> `struct vcpu` and pass `current` as a function parameter instead
> of calculation it each time in called functions.
> 
> Andrii Anisov (15):
>   hack: drop GIC v3 support
>   vgic: move pause_flags check out of vgic spinlock
>   vgic: move irq_to_pending out of lock
>   gic-vgic: Drop an excessive clear_lrs
>   gic: drop interrupts enabling on interrupts processing
>   gic-vgic:vgic: do not keep disabled IRQs in any of queues
>   gic: separate ppi processing
>   gic-vgic:vgic: avoid excessive conversions
>   gic:vgic:gic-vgic: introduce non-atomic bitops
>   irq: skip action avalability check for guest's IRQ
>   gic-v2: Write HCR only on change
>   gic-vgic: skip irqs locking
>   hack: arm/domain: simplify context restore from idle vcpu
>   hack: move gicv2 LRs reads and writes out of spinlocks
>   gic: vgic: align frequently accessed data by cache line size
> 
> Julien Grall (1):
>   xen/arm: Re-enable interrupt later in the trap path
> 
>  xen/arch/arm/arm64/entry.S           | 11 ++---
>  xen/arch/arm/configs/arm64_defconfig |  1 +
>  xen/arch/arm/domain.c                | 25 ++++++----
>  xen/arch/arm/gic-v2.c                | 63 ++++++++++++++++--------
>  xen/arch/arm/gic-v3-lpi.c            |  2 +
>  xen/arch/arm/gic-v3.c                |  2 +
>  xen/arch/arm/gic-vgic.c              | 84
> +++++++++++++++++++------------- xen/arch/arm/gic.c
> | 32 +++++++++++-- xen/arch/arm/irq.c                   | 46
> +++++++++++++++--- xen/arch/arm/traps.c                 |  6 +++
>  xen/arch/arm/vgic-v3-its.c           |  3 +-
>  xen/arch/arm/vgic.c                  | 93
> ++++++++++++++++++++++++++++++------
> xen/arch/arm/vgic/vgic.c             |  2 +
> xen/include/asm-arm/config.h         |  2 +-
> xen/include/asm-arm/gic.h            | 10 ++--
> xen/include/asm-arm/irq.h            |  3 ++
> xen/include/asm-arm/vgic.h           | 24 ++++++----
> xen/include/xen/sched.h              |  1 + 18 files changed, 299
> insertions(+), 111 deletions(-)
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 02/16] hack: drop GIC v3 support
  2018-11-28 21:31 ` [RFC 02/16] hack: drop GIC v3 support Andrii Anisov
@ 2018-11-29 12:08   ` Andre Przywara
  0 siblings, 0 replies; 59+ messages in thread
From: Andre Przywara @ 2018-11-29 12:08 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

On Wed, 28 Nov 2018 23:31:57 +0200
Andrii Anisov <andrii.anisov@gmail.com> wrote:

Hi,

> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This reduces some code and conditions in an IRQ processing path,
> and opens way to further code reduction.

While I understand that this is some sort of a hack, I am commenting
just on this patch to demonstrate some issues I see all over the series.
So please apply those ideas to the other patches as well if applicable.

> Also insert compilation errors into gicv3 code, because it would
> not compile or work with following patches.
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>  xen/arch/arm/configs/arm64_defconfig |  1 +
>  xen/arch/arm/gic-v3-lpi.c            |  2 ++
>  xen/arch/arm/gic-v3.c                |  2 ++
>  xen/arch/arm/gic-vgic.c              | 13 ++++++++++++-
>  xen/arch/arm/gic.c                   |  6 ++++++
>  xen/arch/arm/vgic-v3-its.c           |  1 +
>  xen/arch/arm/vgic.c                  | 20 ++++++++++++++++++++
>  xen/arch/arm/vgic/vgic.c             |  2 ++
>  xen/include/asm-arm/irq.h            |  2 ++
>  xen/include/asm-arm/vgic.h           | 17 +++++++++++------
>  10 files changed, 59 insertions(+), 7 deletions(-)
> 
> diff --git a/xen/arch/arm/configs/arm64_defconfig
> b/xen/arch/arm/configs/arm64_defconfig index e69de29..452c7ac 100644
> --- a/xen/arch/arm/configs/arm64_defconfig
> +++ b/xen/arch/arm/configs/arm64_defconfig
> @@ -0,0 +1 @@
> +# CONFIG_GICV3 is not set
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index e8c6e15..be64e17 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -32,6 +32,8 @@
>  #include <asm/page.h>
>  #include <asm/sysregs.h>
>  
> +#error "The current gic/vgic/domain code does not support GICv3"
> +

I didn't check too thoroughly, but why do you need this? Your code
changes below seem to be guarded by #ifdef CONFIG_GICV3, so this file
(and the next) wouldn't even be build.

>  /*
>   * There could be a lot of LPIs on the host side, and they always go
> to
>   * a guest. So having a struct irq_desc for each of them would be
> wasteful diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index 6fbc106..8e835b5 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -44,6 +44,8 @@
>  #include <asm/io.h>
>  #include <asm/sysregs.h>
>  
> +#error "The current gic/vgic/domain code does not support GICv3"
> +
>  /* Global state */
>  static struct {
>      void __iomem *map_dbase;  /* Mapped address of distributor
> registers */ diff --git a/xen/arch/arm/gic-vgic.c
> b/xen/arch/arm/gic-vgic.c index 990399c..869987a 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -36,7 +36,9 @@ static inline void gic_set_lr(int lr, struct
> pending_irq *p, {
>      ASSERT(!local_irq_is_enabled());
>  
> +#ifdef CONFIG_GICV3
>      clear_bit(GIC_IRQ_GUEST_PRISTINE_LPI, &p->status);
> +#endif
>  
>      gic_hw_ops->update_lr(lr, p->irq, p->priority,
>                            p->desc ? p->desc->irq : INVALID_IRQ,
> state); @@ -77,9 +79,11 @@ void gic_raise_inflight_irq(struct vcpu
> *v, unsigned int virtual_irq) {
>      struct pending_irq *n = irq_to_pending(v, virtual_irq);
>  
> +#ifdef CONFIG_GICV3
>      /* If an LPI has been removed meanwhile, there is nothing left
> to raise. */ if ( unlikely(!n) )
>          return;
> +#endif

So are you sure that this really saves you something? This check boils
down to a:
	cbz     x0, 670 <gic_raise_inflight_irq+0x88>
in assembly, which is basically free on modern CPUs.
Surprisingly removing this removes some more instructions from the
function, it would be interesting to know why.

In general I get the impression you optimise things just by looking at
the C code. Does saving a few instructions here and there really make a
difference? For instance I'd say that any non-memory instructions are
not worth to think about, and any stack accesses are probably caught in
the L1 cache.

In general I believe your performance drop is due to *exits* from the
guests due to the h/w interrupts, not necessarily because of the VGIC
code. So to reduce latency I think it would be more worthwhile to see
if we can reduce the world switch time, by avoiding unnessary
save/restores.
At least that saved a lot in KVM (with VHE), although the conditions
there are quite different.

>      ASSERT(spin_is_locked(&v->arch.vgic.lock));
>  
> @@ -112,13 +116,14 @@ static unsigned int gic_find_unused_lr(struct
> vcpu *v, {
>      unsigned int nr_lrs = gic_get_nr_lrs();
>      unsigned long *lr_mask = (unsigned long *) &this_cpu(lr_mask);
> -    struct gic_lr lr_val;
>  
>      ASSERT(spin_is_locked(&v->arch.vgic.lock));
>  
> +#ifdef CONFIG_GICV3

I am not against removing GICv3 code from the code path if that is
not configured, but obviously those #ifdef's directly in the code are
hideous.
If you can prove that this saves you something, you could think
factoring out those functions, and define them as empty if CONFIG_GICV3
is not defined.
In this case you could do (outside of this function):

#ifdef CONFIG_GICV3
#define lpi_pristine_bit_set(p)	\
	test_bit(GIC_IRQ_GUEST_PRISTINE_LPI, &(p)->status)
#else
#define lpi_pristine_bit_set(p) false
#endif

That should make a fairly recent GCC remove the whole loop. Btw: Make
sure you use at *least* GCC 6, but preferably a newer version. They
improved quite a lot when it comes to dead code elimination.

>      if ( unlikely(test_bit(GIC_IRQ_GUEST_PRISTINE_LPI, &p->status)) )
>      {
>          unsigned int used_lr;
> +        struct gic_lr lr_val;
>  
>          for_each_set_bit(used_lr, lr_mask, nr_lrs)
>          {
> @@ -127,6 +132,7 @@ static unsigned int gic_find_unused_lr(struct
> vcpu *v, return used_lr;
>          }
>      }
> +#endif
>  
>      lr = find_next_zero_bit(lr_mask, nr_lrs, lr);
>  
> @@ -142,9 +148,11 @@ void gic_raise_guest_irq(struct vcpu *v,
> unsigned int virtual_irq, 
>      ASSERT(spin_is_locked(&v->arch.vgic.lock));
>  
> +#ifdef CONFIG_GICV3
>      if ( unlikely(!p) )
>          /* An unmapped LPI does not need to be raised. */
>          return;
> +#endif
>  
>      if ( v == current && list_empty(&v->arch.vgic.lr_pending) )
>      {
> @@ -172,6 +180,8 @@ static void gic_update_one_lr(struct vcpu *v, int
> i) gic_hw_ops->read_lr(i, &lr_val);
>      irq = lr_val.virq;
>      p = irq_to_pending(v, irq);
> +
> +#ifdef CONFIG_GICV3
>      /*
>       * An LPI might have been unmapped, in which case we just clean
> up here.
>       * If that LPI is marked as PRISTINE, the information in the LR
> is bogus, @@ -188,6 +198,7 @@ static void gic_update_one_lr(struct
> vcpu *v, int i) 
>          return;
>      }
> +#endif
>  
>      if ( lr_val.active )
>      {
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 6cc7dec..77fc06f 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -136,7 +136,9 @@ int gic_route_irq_to_guest(struct domain *d,
> unsigned int virq, /* Caller has already checked that the IRQ is an
> SPI */ ASSERT(virq >= 32);
>      ASSERT(virq < vgic_num_irqs(d));
> +#ifdef CONFIG_GICV3
>      ASSERT(!is_lpi(virq));
> +#endif

Wait, did you actually benchmark a debug build?
ASSERTs define to nothing unless you have CONFIG_DEBUG=y. We insert
them excessively under this premise: Have seemingly obvious checks to
help catching bugs, they won't cost anything normally in production
builds.
  
>      /*
>       * When routing an IRQ to guest, the virtual state is not synced
> @@ -168,7 +170,9 @@ int gic_remove_irq_from_guest(struct domain *d,
> unsigned int virq, 
>      ASSERT(spin_is_locked(&desc->lock));
>      ASSERT(test_bit(_IRQ_GUEST, &desc->status));
> +#ifdef CONFIG_GICV3
>      ASSERT(!is_lpi(virq));
> +#endif
>  
>      /*
>       * Removing an interrupt while the domain is running may have
> @@ -391,6 +395,7 @@ void gic_interrupt(struct cpu_user_regs *regs,
> int is_fiq) do_IRQ(regs, irq, is_fiq);
>              local_irq_disable();
>          }
> +#ifdef CONFIG_GICV3
>          else if ( is_lpi(irq) )

Similarly to what I wrote above, you could make sure that is_lpi()
always return false if GICv3 is not configured. GCC will then remove
any code using this function.
This would save you most of the chunks below and keep the code readable.

>          {
>              local_irq_enable();
> @@ -398,6 +403,7 @@ void gic_interrupt(struct cpu_user_regs *regs,
> int is_fiq) gic_hw_ops->do_LPI(irq);
>              local_irq_disable();
>          }
> +#endif
>          else if ( unlikely(irq < 16) )
>          {
>              do_sgi(regs, irq);
> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
> index 5b73c4e..3c6693d 100644
> --- a/xen/arch/arm/vgic-v3-its.c
> +++ b/xen/arch/arm/vgic-v3-its.c
> @@ -47,6 +47,7 @@
>  #include <asm/vgic-emul.h>
>  #include <asm/vreg.h>
>  
> +#error "The current gic/vgic/domain code does not support GICv3"
>  /*
>   * Data structure to describe a virtual ITS.
>   * If both the vcmd_lock and the its_lock are required, the
> vcmd_lock must diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index f2608b0..a2419d0 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -64,14 +64,18 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu
> *v, unsigned int irq) 
>  void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>  {
> +#ifdef CONFIG_GICV3
>      /* The lpi_vcpu_id field must be big enough to hold a VCPU ID. */
>      BUILD_BUG_ON(BIT(sizeof(p->lpi_vcpu_id) * 8) < MAX_VIRT_CPUS);
> +#endif
>  
>      memset(p, 0, sizeof(*p));
>      INIT_LIST_HEAD(&p->inflight);
>      INIT_LIST_HEAD(&p->lr_queue);
>      p->irq = virq;
> +#ifdef CONFIG_GICV3
>      p->lpi_vcpu_id = INVALID_VCPU_ID;
> +#endif

IIUC, vgic_init_pending_irq() is only called on domain/VGIC init (and on
mapping an LPI). So how does optimising this function help you with
your interrupt latency?

>  }
>  
>  static void vgic_rank_init(struct vgic_irq_rank *rank, uint8_t index,
> @@ -243,9 +247,11 @@ static int vgic_get_virq_priority(struct vcpu
> *v, unsigned int virq) {
>      struct vgic_irq_rank *rank;
>  
> +#ifdef CONFIG_GICV3
>      /* LPIs don't have a rank, also store their priority separately.
> */ if ( is_lpi(virq) )
>          return
> v->domain->arch.vgic.handler->lpi_get_priority(v->domain, virq);
> +#endif 
>      rank = vgic_rank_irq(v, virq);
>      return ACCESS_ONCE(rank->priority[virq & INTERRUPT_RANK_MASK]);
> @@ -256,8 +262,10 @@ bool vgic_migrate_irq(struct vcpu *old, struct
> vcpu *new, unsigned int irq) unsigned long flags;
>      struct pending_irq *p;
>  
> +#ifdef CONFIG_GICV3
>      /* This will never be called for an LPI, as we don't migrate
> them. */ ASSERT(!is_lpi(irq));
> +#endif
>  
>      spin_lock_irqsave(&old->arch.vgic.lock, flags);
>  
> @@ -312,6 +320,7 @@ void arch_move_irqs(struct vcpu *v)
>      struct vcpu *v_target;
>      int i;
>  
> +#ifdef CONFIG_GICV3
>      /*
>       * We don't migrate LPIs at the moment.
>       * If we ever do, we must make sure that the struct pending_irq
> does @@ -322,6 +331,7 @@ void arch_move_irqs(struct vcpu *v)
>       * don't participate.
>       */
>      ASSERT(!is_lpi(vgic_num_irqs(d) - 1));
> +#endif
>  
>      for ( i = 32; i < vgic_num_irqs(d); i++ )
>      {
> @@ -343,8 +353,10 @@ void vgic_disable_irqs(struct vcpu *v, uint32_t
> r, int n) int i = 0;
>      struct vcpu *v_target;
>  
> +#ifdef CONFIG_GICV3
>      /* LPIs will never be disabled via this function. */
>      ASSERT(!is_lpi(32 * n + 31));
> +#endif
>  
>      while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
>          irq = i + (32 * n);
> @@ -393,8 +405,10 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t
> r, int n) struct vcpu *v_target;
>      struct domain *d = v->domain;
>  
> +#ifdef CONFIG_GICV3
>      /* LPIs will never be enabled via this function. */
>      ASSERT(!is_lpi(32 * n + 31));
> +#endif
>  
>      while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
>          irq = i + (32 * n);
> @@ -490,8 +504,10 @@ struct pending_irq *irq_to_pending(struct vcpu
> *v, unsigned int irq)
>       * are used for SPIs; the rests are used for per cpu irqs */
>      if ( irq < 32 )
>          n = &v->arch.vgic.pending_irqs[irq];
> +#ifdef CONFIG_GICV3
>      else if ( is_lpi(irq) )
>          n = v->domain->arch.vgic.handler->lpi_to_pending(v->domain,
> irq); +#endif
>      else
>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>      return n;
> @@ -550,12 +566,14 @@ void vgic_inject_irq(struct domain *d, struct
> vcpu *v, unsigned int virq, spin_lock_irqsave(&v->arch.vgic.lock,
> flags); 
>      n = irq_to_pending(v, virq);
> +#ifdef CONFIG_GICV3
>      /* If an LPI has been removed, there is nothing to inject here.
> */ if ( unlikely(!n) )
>      {
>          spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
>          return;
>      }
> +#endif
>  
>      /* vcpu offline */
>      if ( test_bit(_VPF_down, &v->pause_flags) )
> @@ -607,8 +625,10 @@ bool vgic_evtchn_irq_pending(struct vcpu *v)
>      struct pending_irq *p;
>  
>      p = irq_to_pending(v, v->domain->arch.evtchn_irq);
> +#ifdef CONFIG_GICV3
>      /* Does not work for LPIs. */
>      ASSERT(!is_lpi(v->domain->arch.evtchn_irq));
> +#endif
>  
>      return list_empty(&p->inflight);
>  }
> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
> index e2844dc..b8dbdaf 100644
> --- a/xen/arch/arm/vgic/vgic.c
> +++ b/xen/arch/arm/vgic/vgic.c
> @@ -703,8 +703,10 @@ bool vgic_evtchn_irq_pending(struct vcpu *v)
>      unsigned long flags;
>      bool pending;
>  
> +#ifdef CONFIG_GICV3
>      /* Does not work for LPIs. */
>      ASSERT(!is_lpi(v->domain->arch.evtchn_irq));
> +#endif
>  
>      irq = vgic_get_irq(v->domain, v, v->domain->arch.evtchn_irq);
>      spin_lock_irqsave(&irq->irq_lock, flags);
> diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> index e45d574..4f1ef3c 100644
> --- a/xen/include/asm-arm/irq.h
> +++ b/xen/include/asm-arm/irq.h
> @@ -63,10 +63,12 @@ struct irq_desc *__irq_to_desc(int irq);
>  
>  void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int
> is_fiq); 
> +#ifdef CONFIG_GICV3
>  static inline bool is_lpi(unsigned int irq)
>  {
>      return irq >= LPI_OFFSET;
>  }
> +#endif
>  
>  #define domain_pirq_to_irq(d, pirq) (pirq)
>  
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index 447d24e..c5cb63f 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -63,27 +63,32 @@ struct pending_irq
>       * GIC_IRQ_GUEST_MIGRATING: the irq is being migrated to a
> different
>       * vcpu while it is still inflight and on an GICH_LR register on
> the
>       * old vcpu.
> -     *
> -     * GIC_IRQ_GUEST_PRISTINE_LPI: the IRQ is a newly mapped LPI,
> which
> -     * has never been in an LR before. This means that any trace of
> an
> -     * LPI with the same number in an LR must be from an older LPI,
> which
> -     * has been unmapped before.
> -     *
>       */
>  #define GIC_IRQ_GUEST_QUEUED   0
>  #define GIC_IRQ_GUEST_ACTIVE   1
>  #define GIC_IRQ_GUEST_VISIBLE  2
>  #define GIC_IRQ_GUEST_ENABLED  3
>  #define GIC_IRQ_GUEST_MIGRATING   4
> +#ifdef CONFIG_GICV3
> +    /*
> +     * GIC_IRQ_GUEST_PRISTINE_LPI: the IRQ is a newly mapped LPI,
> which
> +     * has never been in an LR before. This means that any trace of
> an
> +     * LPI with the same number in an LR must be from an older LPI,
> which
> +     * has been unmapped before.
> +     * Valid for GICV3 only.
> +     */
>  #define GIC_IRQ_GUEST_PRISTINE_LPI  5
> +#endif
>      unsigned long status;
>      struct irq_desc *desc; /* only set it the irq corresponds to a
> physical irq */ unsigned int irq;
>  #define GIC_INVALID_LR         (uint8_t)~0
>      uint8_t lr;
>      uint8_t priority;
> +#ifdef CONFIG_GICV3
>      uint8_t lpi_priority;       /* Caches the priority if this is an
> LPI. */ uint8_t lpi_vcpu_id;        /* The VCPU for an LPI. */
> +#endif

Did you see any issues with the data structure being too big? I think we
spent some effort to make sure the size is reasonable.

Cheers,
Andre.


>      /* inflight is used to append instances of pending_irq to
>       * vgic.inflight_irqs */
>      struct list_head inflight;


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-11-28 21:32 ` [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops Andrii Anisov
@ 2018-11-29 12:14   ` Andre Przywara
  2018-11-29 16:07     ` Julien Grall
  2018-12-03 12:33     ` Andrii Anisov
  0 siblings, 2 replies; 59+ messages in thread
From: Andre Przywara @ 2018-11-29 12:14 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

On Wed, 28 Nov 2018 23:32:05 +0200
Andrii Anisov <andrii.anisov@gmail.com> wrote:

Hi,

> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> All bit operations for gic, vgic and gic-vgic are performed under
> spinlocks, so there is no need for atomic bit ops here, they only
> introduce excessive call to functions used more expensive exclusive
> ARM instructions.
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>  xen/arch/arm/gic-vgic.c | 16 ++++++++++++++++
>  xen/arch/arm/gic.c      | 16 ++++++++++++++++
>  xen/arch/arm/vgic.c     | 16 ++++++++++++++++
>  3 files changed, 48 insertions(+)
> 
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index f0e6c6f..5b73bbd 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -25,6 +25,22 @@
>  #include <asm/gic.h>
>  #include <asm/vgic.h>
>  
> +#undef set_bit
> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
> +
> +#undef clear_bit
> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
> +
> +#undef test_bit
> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
> +
> +#undef test_and_clear_bit
> +#define test_and_clear_bit(nr,addr) ({                    \
> +    bool _x; \
> +    _x = (*(addr) & (1<<nr));                        \
> +    (*(addr) &= ~(1<<nr));                                \
> +    (_x);})
> +
>  #define lr_all_full() (this_cpu(lr_mask) == ((1 << gic_get_nr_lrs())
> - 1)) 
>  #undef GIC_DEBUG
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 52e4270..d558059 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -40,6 +40,22 @@
>  
>  DEFINE_PER_CPU(uint64_t, lr_mask);
>  
> +#undef set_bit
> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
> +
> +#undef clear_bit
> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
> +
> +#undef test_bit
> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
> +
> +#undef test_and_clear_bit
> +#define test_and_clear_bit(nr,addr) ({                    \
> +    bool _x; \
> +    _x = (*(addr) & (1<<nr));                        \
> +    (*(addr) &= ~(1<<nr));                                \
> +    (_x);})
> +
>  #undef GIC_DEBUG
>  
>  const struct gic_hw_operations *gic_hw_ops;
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index c142476..7691310 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -33,6 +33,22 @@
>  #include <asm/gic.h>
>  #include <asm/vgic.h>
>  
> +#undef set_bit

Nah, please don't do this. Can you show that atomic bit ops are a
problem? They shouldn't be expensive unless contended, also pretty
lightweight on small systems (single cluster).

But if you really think this is useful, why not go with the Linux way
of using __set_bit to provide a non-atomic version?
This would have the big advantage that you can replace them on a
case-by-case base, which is much less risky than unconditionally
replacing every (even future!) usage in the whole file.

Cheers,
Andre.

> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
> +
> +#undef clear_bit
> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
> +
> +#undef test_bit
> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
> +
> +#undef test_and_clear_bit
> +#define test_and_clear_bit(nr,addr) ({                    \
> +    bool _x = (*(addr) & (1<<nr));                        \
> +    (*(addr) &= ~(1<<nr));                                \
> +    return (_x);                                                 \
> +})
> +
>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v,
> int rank) {
>      if ( rank == 0 )


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu
  2018-11-28 21:32 ` [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu Andrii Anisov
@ 2018-11-29 15:53   ` Julien Grall
  2018-11-29 16:00   ` Wei Liu
  1 sibling, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 15:53 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu

Hi Andrii,

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Simplify context restore from idle vcpu to the one ran before it.
> This improves low cpu load but high irq rate use-cases.

While I agree that the context switch today is pretty inefficient, I would be 
interest to know how much you improve it.

Also, it would be nice if you can explain why you improve it that way. Why do 
you only need to restore the timer and gic?

Lastly, there are code coming up that will require p2m_save_store and 
p2m_restore_state to work in pair (see Cortex A76 erratum [1]). So that solution 
may not work very well in all the cases.

Overall, I think you can save much more in the context switch than what you 
currently do. For instance, if you are switch from vCPU A to idle vCPU you don't 
need to save the context.

You would only do it if after the idle vCPU you are scheduling a different vCPU 
or the vCPU is been migrated.

Another place of optimization when switching between 2 non-idle vCPU is 
save/restore of the VFP. You can avoid restoring them and instead trap it when 
the guest is using it.

A last place is the number of isb scattered over the context switch code. A lot 
of them are unnecessary at all because the system registers asssociated to 
EL1/EL0 are out-of-context.

I know that Stefano has been working on some patches. Stefano would it be 
possible to share your work?

Cheers,

[1] https://lists.xenproject.org/archives/html/xen-devel/2018-11/msg03241.html

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/domain.c   | 21 +++++++++++----------
>   xen/include/xen/sched.h |  1 +
>   2 files changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 1d926dc..8e886b7 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -187,9 +187,6 @@ static void ctxt_switch_to(struct vcpu *n)
>       WRITE_SYSREG32(vpidr, VPIDR_EL2);
>       WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2);
>   
> -    /* VGIC */
> -    gic_restore_state(n);
> -
>       /* VFP */
>       vfp_restore_state(n);
>   
> @@ -263,11 +260,6 @@ static void ctxt_switch_to(struct vcpu *n)
>       WRITE_SYSREG(n->arch.csselr, CSSELR_EL1);
>   
>       isb();
> -
> -    /* This is could trigger an hardware interrupt from the virtual
> -     * timer. The interrupt needs to be injected into the guest. */
> -    WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1);
> -    virt_timer_restore(n);
>   }
>   
>   /* Update per-VCPU guest runstate shared memory area (if registered). */
> @@ -302,8 +294,17 @@ static void update_runstate_area(struct vcpu *v)
>   static void schedule_tail(struct vcpu *prev)
>   {
>       ctxt_switch_from(prev);
> -
> -    ctxt_switch_to(current);
> +    if ( !(is_idle_vcpu(prev) && (prev->prev == current)) )
> +        ctxt_switch_to(current);
> +    /* VGIC */
> +    if ( !is_idle_vcpu(current) )
> +    {
> +        gic_restore_state(current);
> +        WRITE_SYSREG32(current->arch.cntkctl, CNTKCTL_EL1);
> +        virt_timer_restore(current);
> +    }
> +    else
> +        current->prev = prev;
>   
>       local_irq_enable();
>   
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 0309c1f..e85108d 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -272,6 +272,7 @@ struct vcpu
>       struct vpci_vcpu vpci;
>   
>       struct arch_vcpu arch;
> +    struct vcpu *prev;
>   };
>   
>   /* Per-domain lock can be recursively acquired in fault handlers. */
> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu
  2018-11-28 21:32 ` [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu Andrii Anisov
  2018-11-29 15:53   ` Julien Grall
@ 2018-11-29 16:00   ` Wei Liu
  1 sibling, 0 replies; 59+ messages in thread
From: Wei Liu @ 2018-11-29 16:00 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

FYI the To: field is empty for your patch.

On Wed, Nov 28, 2018 at 11:32:09PM +0200, Andrii Anisov wrote:
[...]
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -272,6 +272,7 @@ struct vcpu
>      struct vpci_vcpu vpci;
>  
>      struct arch_vcpu arch;
> +    struct vcpu *prev;

I guess this is why I get CC'ed on this patch. I would suggest you put
it into arch_vcpu if this is going to be ARM specific.

(I have skipped other parts of this email)

Wei.

>  };
>  
>  /* Per-domain lock can be recursively acquired in fault handlers. */
> -- 
> 2.7.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-11-29 12:14   ` Andre Przywara
@ 2018-11-29 16:07     ` Julien Grall
  2018-12-03 12:33     ` Andrii Anisov
  1 sibling, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 16:07 UTC (permalink / raw)
  To: Andre Przywara, Andrii Anisov
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi,

On 29/11/2018 12:14, Andre Przywara wrote:
> On Wed, 28 Nov 2018 23:32:05 +0200
> Andrii Anisov <andrii.anisov@gmail.com> wrote:
> 
> Hi,
> 
>> From: Andrii Anisov <andrii_anisov@epam.com>
>>
>> All bit operations for gic, vgic and gic-vgic are performed under
>> spinlocks, so there is no need for atomic bit ops here, they only
>> introduce excessive call to functions used more expensive exclusive
>> ARM instructions.
>>
>> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
>> ---
>>   xen/arch/arm/gic-vgic.c | 16 ++++++++++++++++
>>   xen/arch/arm/gic.c      | 16 ++++++++++++++++
>>   xen/arch/arm/vgic.c     | 16 ++++++++++++++++
>>   3 files changed, 48 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
>> index f0e6c6f..5b73bbd 100644
>> --- a/xen/arch/arm/gic-vgic.c
>> +++ b/xen/arch/arm/gic-vgic.c
>> @@ -25,6 +25,22 @@
>>   #include <asm/gic.h>
>>   #include <asm/vgic.h>
>>   
>> +#undef set_bit
>> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
>> +
>> +#undef clear_bit
>> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
>> +
>> +#undef test_bit
>> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
>> +
>> +#undef test_and_clear_bit
>> +#define test_and_clear_bit(nr,addr) ({                    \
>> +    bool _x; \
>> +    _x = (*(addr) & (1<<nr));                        \
>> +    (*(addr) &= ~(1<<nr));                                \
>> +    (_x);})
>> +
>>   #define lr_all_full() (this_cpu(lr_mask) == ((1 << gic_get_nr_lrs())
>> - 1))
>>   #undef GIC_DEBUG
>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>> index 52e4270..d558059 100644
>> --- a/xen/arch/arm/gic.c
>> +++ b/xen/arch/arm/gic.c
>> @@ -40,6 +40,22 @@
>>   
>>   DEFINE_PER_CPU(uint64_t, lr_mask);
>>   
>> +#undef set_bit
>> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
>> +
>> +#undef clear_bit
>> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
>> +
>> +#undef test_bit
>> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
>> +
>> +#undef test_and_clear_bit
>> +#define test_and_clear_bit(nr,addr) ({                    \
>> +    bool _x; \
>> +    _x = (*(addr) & (1<<nr));                        \
>> +    (*(addr) &= ~(1<<nr));                                \
>> +    (_x);})
>> +
>>   #undef GIC_DEBUG
>>   
>>   const struct gic_hw_operations *gic_hw_ops;
>> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
>> index c142476..7691310 100644
>> --- a/xen/arch/arm/vgic.c
>> +++ b/xen/arch/arm/vgic.c
>> @@ -33,6 +33,22 @@
>>   #include <asm/gic.h>
>>   #include <asm/vgic.h>
>>   
>> +#undef set_bit
> 
> Nah, please don't do this. Can you show that atomic bit ops are a
> problem? They shouldn't be expensive unless contended, also pretty
> lightweight on small systems (single cluster).
> 
> But if you really think this is useful, why not go with the Linux way
> of using __set_bit to provide a non-atomic version?

We already have __set_bit/__clear_bit on Xen. However, it looks like the arm 
version is a wrapper to the atomic version. Rationale from the comment is those 
functions should be interrupt safe.

I don't know where that requirement come from. You already have race if that 
variable is modified simultaneously. So I would just re-use the Linux version.

> This would have the big advantage that you can replace them on a
> case-by-case base, which is much less risky than unconditionally
> replacing every (even future!) usage in the whole file.
> 
> Cheers,
> Andre.
> 
>> +#define set_bit(nr, addr) (*(addr) |= (1<<nr))
>> +
>> +#undef clear_bit
>> +#define clear_bit(nr, addr) (*(addr) &= ~(1<<nr))
>> +
>> +#undef test_bit
>> +#define test_bit(nr,addr) (*(addr) & (1<<nr))
>> +
>> +#undef test_and_clear_bit
>> +#define test_and_clear_bit(nr,addr) ({                    \
>> +    bool _x = (*(addr) & (1<<nr));                        \
>> +    (*(addr) &= ~(1<<nr));                                \
>> +    return (_x);                                                 \
>> +})
>> +
>>   static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v,
>> int rank) {
>>       if ( rank == 0 )
> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 16/16] gic: vgic: align frequently accessed data by cache line size
  2018-11-28 21:32 ` [RFC 16/16] gic: vgic: align frequently accessed data by cache line size Andrii Anisov
@ 2018-11-29 16:24   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 16:24 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov, Andre Przywara



On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Cache line size assumed 64 byte as for H3. Align the `struct
> pending_irq` and allocate lrs shadow aligned to cache line size.

The arch_vcpu is already aligned to a cache size. So how does it improve the 
performance by making lr also aligned to a cache line?

I can believe that pending_irq would be nice to have it aligned to a cache line 
(or half of it). But you need to explain the trade-off as you are going to use 
more memory.

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/domain.c        | 4 ++++
>   xen/arch/arm/vgic.c          | 9 +++++++++
>   xen/include/asm-arm/config.h | 2 +-
>   xen/include/asm-arm/gic.h    | 2 +-
>   xen/include/asm-arm/vgic.h   | 2 +-
>   5 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 8e886b7..8bcb667 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -558,6 +558,10 @@ int arch_vcpu_create(struct vcpu *v)
>       v->arch.saved_context.sp = (register_t)v->arch.cpu_info;
>       v->arch.saved_context.pc = (register_t)continue_new_vcpu;
>   
> +    v->arch.gic.v2.lr = xzalloc_bytes(sizeof(uint32_t) * gic_number_lines());
> +    if ( v->arch.gic.v2.lr == NULL )
> +        return -ENOMEM;
> +
>       /* Idle VCPUs don't need the rest of this setup */
>       if ( is_idle_vcpu(v) )
>           return rc;
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 7691310..bedfb99 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -166,6 +166,15 @@ int domain_vgic_init(struct domain *d, unsigned int nr_spis)
>   
>       d->arch.vgic.pending_irqs =
>           xzalloc_array(struct pending_irq, d->arch.vgic.nr_spis);
> +
> +    if ( sizeof(struct pending_irq) != dcache_line_bytes )
> +    {
> +        printk ("sizeof(struct pending_irq) = %lu  is not equal to cacheline"
> +                "size %lu. Is it expected?\n", sizeof(struct pending_irq),
> +                dcache_line_bytes);
> +        WARN();
> +    }
What could happen if pending_irq is not aligned to your cacheline? If your 
cacheline is smaller than pending_irq, then you are going to use multiple cache 
line. So nothing to worry.

If your cacheline is bigger, then it is a potential concern as you would share 
the cacheline with something else. This may be an issue, but I would like to see 
numbers first.

> +
>       if ( d->arch.vgic.pending_irqs == NULL )
>           return -ENOMEM;
>   
> diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
> index bc89e84..4f3669f 100644
> --- a/xen/include/asm-arm/config.h
> +++ b/xen/include/asm-arm/config.h
> @@ -28,7 +28,7 @@
>   
>   #define CONFIG_ARM 1
>   
> -#define CONFIG_ARM_L1_CACHE_SHIFT 7 /* XXX */
> +#define CONFIG_ARM_L1_CACHE_SHIFT 6 /* XXX */

That's not acceptable, this value is based on the biggest cache line used by Arm 
processors. If you want to use 64 bytes cache line, then you either make sure 
the structure fits in 64-bytes by adding padding.

>   
>   #define CONFIG_SMP 1
>   
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index add2566..fe44d3a 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -186,7 +186,7 @@ struct gic_v2 {
>       uint32_t hcr;
>       uint32_t vmcr;
>       uint32_t apr;
> -    uint32_t lr[64];
> +    uint32_t *lr;
>       uint64_t lr_update_mask;
>   };
>   
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index a27a1a9..d4ec96f 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -99,7 +99,7 @@ struct pending_irq
>        * TODO: when implementing irq migration, taking only the current
>        * vgic lock is not going to be enough. */
>       struct list_head lr_queue;
> -};
> +}__cacheline_aligned;
>   
>   #define NR_INTERRUPT_PER_RANK   32
>   #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
> 

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 12/16] gic-v2: Write HCR only on change
  2018-11-28 21:32 ` [RFC 12/16] gic-v2: Write HCR only on change Andrii Anisov
@ 2018-11-29 16:36   ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-11-29 16:36 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov, Andre Przywara



On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This saves one write to peripheral HCR register per hypervisor entry for
> most cases.
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic-v2.c | 7 +++++--
>   1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 1a744c5..25147bd 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -579,14 +579,17 @@ static void gicv2_write_lr(int lr, const struct gic_lr *lr_reg)
>   
>   static void gicv2_hcr_status(uint32_t flag, bool status)
>   {
> -    uint32_t hcr = readl_gich(GICH_HCR);
> +    uint32_t hcr, ohcr;
> +
> +    ohcr = hcr = readl_gich(GICH_HCR);
>   
>       if ( status )
>           hcr |= flag;
>       else
>           hcr &= (~flag);
>   
> -    writel_gich(hcr, GICH_HCR);
> +    if ( hcr != ohcr )
> +        writel_gich(hcr, GICH_HCR);

While I understand that theoretically this is an issue. Is it actually a real 
performance benefits?

I would actually expect the read to be more expensive than the write because we 
implement writel_gich using writel_relaxed. So there are no barrier afterwards 
to "block" the processor to go forward.

>   }
>   
>   static unsigned int gicv2_read_vmcr_priority(void)
> 

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-11-29 12:14   ` Andre Przywara
  2018-11-29 16:07     ` Julien Grall
@ 2018-12-03 12:33     ` Andrii Anisov
  2018-12-03 12:58       ` Julien Grall
  2018-12-03 16:37       ` Andre Przywara
  1 sibling, 2 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-12-03 12:33 UTC (permalink / raw)
  To: Andre Przywara; +Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

Hello Andre,

On 29.11.18 14:14, Andre Przywara wrote:
> Nah, please don't do this.
Sorry for making you crying looking at this code.
It's terrible, I know. It's rather an idea.

> Can you show that atomic bit ops are a
> problem? They shouldn't be expensive unless contended, also pretty
> lightweight on small systems (single cluster).
Yep, but still it is a call to a function of 10 operations instead of 
one `orr` for set_bit(). Taking in account a heavy usage of bitops in 
the old vgic code, this should benefit latency.

> But if you really think this is useful, why not go with the Linux way
> of using __set_bit to provide a non-atomic version?
> This would have the big advantage that you can replace them on a
> case-by-case base, which is much less risky than unconditionally
> replacing every (even future!) usage in the whole file.
Whatever you prefer :)

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-12-03 12:33     ` Andrii Anisov
@ 2018-12-03 12:58       ` Julien Grall
  2018-12-03 13:05         ` Andrii Anisov
  2018-12-03 16:37       ` Andre Przywara
  1 sibling, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-03 12:58 UTC (permalink / raw)
  To: Andrii Anisov, Andre Przywara
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi Andrii,

On 03/12/2018 12:33, Andrii Anisov wrote:
> On 29.11.18 14:14, Andre Przywara wrote:
>> Nah, please don't do this.
> Sorry for making you crying looking at this code.
> It's terrible, I know. It's rather an idea.
> 
>> Can you show that atomic bit ops are a
>> problem? They shouldn't be expensive unless contended, also pretty
>> lightweight on small systems (single cluster).
> Yep, but still it is a call to a function of 10 operations instead of one `orr` 
> for set_bit(). Taking in account a heavy usage of bitops in the old vgic code, 
> this should benefit latency.

That's micro optimizing Xen... there are better (and less risky) place to look 
for optimization.

Knowing how fragile the locking is on the old vGIC, the risk of micro-optimizing 
is not worth it. If you can provide number that shows you can improve 
performance by more than 10% in common case with this patch only, then I will 
reconsider my position.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-12-03 12:58       ` Julien Grall
@ 2018-12-03 13:05         ` Andrii Anisov
  2018-12-03 13:08           ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-03 13:05 UTC (permalink / raw)
  To: Julien Grall, Andre Przywara; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hello Julien,

On 03.12.18 14:58, Julien Grall wrote:
> That's micro optimizing Xen... there are better (and less risky) place 
> to look for optimization.
I would appreciate you point me those places.

> Knowing how fragile the locking is on the old vGIC, the risk of 
> micro-optimizing is not worth it. If you can provide number that shows 
> you can improve performance by more than 10% in common case with this 
> patch only, then I will reconsider my position.
I got it.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-12-03 13:05         ` Andrii Anisov
@ 2018-12-03 13:08           ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-12-03 13:08 UTC (permalink / raw)
  To: Andrii Anisov, Andre Przywara
  Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 03/12/2018 13:05, Andrii Anisov wrote:
> Hello Julien,
> 
> On 03.12.18 14:58, Julien Grall wrote:
>> That's micro optimizing Xen... there are better (and less risky) place to look 
>> for optimization.
> I would appreciate you point me those places.

I already pointed them in various threads with you. In any case, I will not 
consider any patch without benchmark unless the patches are for correctness.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops
  2018-12-03 12:33     ` Andrii Anisov
  2018-12-03 12:58       ` Julien Grall
@ 2018-12-03 16:37       ` Andre Przywara
  1 sibling, 0 replies; 59+ messages in thread
From: Andre Przywara @ 2018-12-03 16:37 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Julien Grall, Stefano Stabellini, Andrii Anisov

On Mon, 3 Dec 2018 14:33:08 +0200
Andrii Anisov <andrii.anisov@gmail.com> wrote:

Hi Andrii,

> On 29.11.18 14:14, Andre Przywara wrote:
> > Nah, please don't do this.  
> Sorry for making you crying looking at this code.
> It's terrible, I know. It's rather an idea.
> 
> > Can you show that atomic bit ops are a
> > problem? They shouldn't be expensive unless contended, also pretty
> > lightweight on small systems (single cluster).  
> Yep, but still it is a call to a function of 10 operations instead of 
> one `orr` for set_bit(). Taking in account a heavy usage of bitops in 
> the old vgic code, this should benefit latency.

Please be careful with any estimate on "cost" when just looking at the
number of instructions. The costly part in both operations is the
memory access, if that misses any caches, it can easily cost multiple
hundred cycles, then doing ten more register-only instructions before
or after doesn't really matter anymore.
Plus __set_bit would be a "ldr-orr-str" sequence, not just an orr, as
this would need to be a read-modify-write sequence. So the difference
is not too much.

Cheers,
Andre.

> > But if you really think this is useful, why not go with the Linux
> > way of using __set_bit to provide a non-atomic version?
> > This would have the big advantage that you can replace them on a
> > case-by-case base, which is much less risky than unconditionally
> > replacing every (even future!) usage in the whole file.  
> Whatever you prefer :)
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 05/16] gic-vgic: Drop an excessive clear_lrs
  2018-11-28 21:32 ` [RFC 05/16] gic-vgic: Drop an excessive clear_lrs Andrii Anisov
@ 2018-12-11 14:33   ` Julien Grall
  2018-12-12 11:01     ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-11 14:33 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi Andrii,

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> This action is excessive because for an invalid LR there is no need
> to write another invalid value to a register. So we can skip it here,
> saving a peripheral register access.
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic-vgic.c | 1 -
>   1 file changed, 1 deletion(-)
> 
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index 869987a..4e18169 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -227,7 +227,6 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>       }
>       else
>       {
> -        gic_hw_ops->clear_lr(i);

I would actually prefer to keep this code for debug build. This makes easier to 
read the dumped LRs as you don't have to worry about invalid one.

With #ifndef NDEBUG and the appropriate comment:

Reviewed-by: Julien Grall <julien.grall@arm.com>

Feel free to resent it alone so it can be merged to Xen 4.12.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-11-28 21:32 ` [RFC 11/16] irq: skip action avalability check for guest's IRQ Andrii Anisov
@ 2018-12-11 14:48   ` Julien Grall
  2018-12-12 11:30     ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-11 14:48 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi Andrii,

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> An IRQ assigned to guest always has an action. This removes
> another odd check on guest IRQ path.

And you can't see any potential race in that code happening in the future?

> Also getting an unknown
> interrupt is very unlikely on a non-debug platform.

I am tempted to keep the code at the same place but protect with an #ifndef 
NDEBUG. What do you think?

Like patch #5, feel free to resend the patch alone for Xen 4.12.

Cheers,

> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/irq.c | 14 +++++++-------
>   1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index 596a8b8..5debfc5 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -205,13 +205,6 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
>       spin_lock(&desc->lock);
>       desc->handler->ack(desc);
>   
> -    if ( !desc->action )
> -    {
> -        printk("Unknown %s %#3.3x\n",
> -               is_fiq ? "FIQ" : "IRQ", irq);
> -        goto out;
> -    }
> -
>       if ( test_bit(_IRQ_GUEST, &desc->status) )
>       {
>           struct irq_guest *info = irq_get_guest_info(desc);
> @@ -229,6 +222,13 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
>           goto out_no_end;
>       }
>   
> +    if ( unlikely(!desc->action) )
> +    {
> +        printk("Unknown %s %#3.3x\n",
> +               is_fiq ? "FIQ" : "IRQ", irq);
> +        goto out;
> +    }
> +
>       set_bit(_IRQ_PENDING, &desc->status);
>   
>       /*
> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 05/16] gic-vgic: Drop an excessive clear_lrs
  2018-12-11 14:33   ` Julien Grall
@ 2018-12-12 11:01     ` Andrii Anisov
  2018-12-12 11:55       ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 11:01 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hello Julien,

On 11.12.18 16:33, Julien Grall wrote:
> 
> With #ifndef NDEBUG and the appropriate comment:
Will do.
> 
> Reviewed-by: Julien Grall <julien.grall@arm.com>
Thank you.

> Feel free to resent it alone so it can be merged to Xen 4.12.
What about getting it together with "[RFC 11/16] irq: skip action avalability check for guest's IRQ"?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-11 14:48   ` Julien Grall
@ 2018-12-12 11:30     ` Andrii Anisov
  2018-12-12 11:59       ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 11:30 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 11.12.18 16:48, Julien Grall wrote:
> And you can't see any potential race in that code happening in the future?
It is protected with `desc->lock` so far. If one decided to get it from under the lock, the race is possible with `release_irq()`.

>> Also getting an unknown
>> interrupt is very unlikely on a non-debug platform.
> 
> I am tempted to keep the code at the same place but protect with an #ifndef NDEBUG. What do you think?
Well, I think about a correspondent ASSERT for the guest IRQ. Like following:

>>       if ( test_bit(_IRQ_GUEST, &desc->status) )
>>       {>>           struct irq_guest *info = irq_get_guest_info(desc);
+            ASSERT( desc->action != NULL );

What would you prefer?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 05/16] gic-vgic: Drop an excessive clear_lrs
  2018-12-12 11:01     ` Andrii Anisov
@ 2018-12-12 11:55       ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-12-12 11:55 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12/12/2018 11:01, Andrii Anisov wrote:
> Hello Julien,
> 
> On 11.12.18 16:33, Julien Grall wrote:
>>
>> With #ifndef NDEBUG and the appropriate comment:
> Will do.
>>
>> Reviewed-by: Julien Grall <julien.grall@arm.com>
> Thank you.
> 
>> Feel free to resent it alone so it can be merged to Xen 4.12.
> What about getting it together with "[RFC 11/16] irq: skip action avalability 
> check for guest's IRQ"?

I am fine with that :).

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 11:30     ` Andrii Anisov
@ 2018-12-12 11:59       ` Julien Grall
  2018-12-12 13:51         ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 11:59 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12/12/2018 11:30, Andrii Anisov wrote:
> 
> 
> On 11.12.18 16:48, Julien Grall wrote:
>> And you can't see any potential race in that code happening in the future?
> It is protected with `desc->lock` so far. If one decided to get it from under 
> the lock, the race is possible with `release_irq()`.
> 
>>> Also getting an unknown
>>> interrupt is very unlikely on a non-debug platform.
>>
>> I am tempted to keep the code at the same place but protect with an #ifndef 
>> NDEBUG. What do you think?
> Well, I think about a correspondent ASSERT for the guest IRQ. Like following:
> 
>>>       if ( test_bit(_IRQ_GUEST, &desc->status) )
>>>       {>>           struct irq_guest *info = irq_get_guest_info(desc);
> +            ASSERT( desc->action != NULL );
> 
> What would you prefer?

An ASSERT(...) is already embed in irq_get_guest_info(desc).

I thought about the ASSERT(...) over the current printk yesterday. But I 
discarded it because it does not give you more information if something went 
really wrong as the stack trace would not really be helpful in that context.

So I would prefer the #ifdef NDEBUG solution.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 13/16] gic-vgic: skip irqs locking
  2018-11-28 21:32 ` [RFC 13/16] gic-vgic: skip irqs locking Andrii Anisov
@ 2018-12-12 12:07   ` Julien Grall
  2018-12-12 12:35     ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 12:07 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi,

On 28/11/2018 21:32, Andrii Anisov wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Those fucntions are called under IRQs disabled already, so avoid

s/fucntions/

> additional flags saving and restore.
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> ---
>   xen/arch/arm/gic-vgic.c | 10 ++++------
>   1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index 5b73bbd..e511f91 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -268,7 +268,6 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>   void vgic_sync_from_lrs(struct vcpu *v)
>   {
>       int i = 0;
> -    unsigned long flags;
>       unsigned int nr_lrs = gic_get_nr_lrs();
>   
>       /* The idle domain has no LRs to be cleared. Since gic_restore_state
> @@ -279,7 +278,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
>   
>       gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
>   
> -    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> +    spin_lock(&v->arch.vgic.lock);

This chunk relies on patch #1, am I correct? If so, this should be written in 
the commit message that this was introduced recently. This helps to figure out 
whether the patch can be merged before the rest.

>   
>       while ((i = find_next_bit((const unsigned long *) &this_cpu(lr_mask),
>                                 nr_lrs, i)) < nr_lrs ) {
> @@ -287,7 +286,7 @@ void vgic_sync_from_lrs(struct vcpu *v)
>           i++;
>       }
>   
> -    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> +    spin_unlock(&v->arch.vgic.lock);
>   }
>   
>   static void gic_restore_pending_irqs(struct vcpu *v)
> @@ -295,11 +294,10 @@ static void gic_restore_pending_irqs(struct vcpu *v)
>       int lr = 0;
>       struct pending_irq *p, *t, *p_r;
>       struct list_head *inflight_r;
> -    unsigned long flags;
>       unsigned int nr_lrs = gic_get_nr_lrs();
>       int lrs = nr_lrs;
>   
> -    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> +    spin_lock(&v->arch.vgic.lock);

I would add an ASSERT(!local_is_irq_enabled()) on top to show that this should 
be called with interrupt disabled.

>   
>       if ( list_empty(&v->arch.vgic.lr_pending) )
>           goto out;
> @@ -343,7 +341,7 @@ found:
>       }
>   
>   out:
> -    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> +    spin_unlock(&v->arch.vgic.lock);
>   }
>   
>   void gic_clear_pending_irqs(struct vcpu *v)
> 

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 06/16] gic: drop interrupts enabling on interrupts processing
  2018-11-28 22:06   ` Julien Grall
@ 2018-12-12 12:10     ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-12-12 12:10 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini

Hi,

On 28/11/2018 22:06, Julien Grall wrote:
> On 28/11/2018 21:32, Andrii Anisov wrote:
>> From: Andrii Anisov <andrii_anisov@epam.com>
>>
>> This reduces the number of context switches in case we have an
>> interrupt storm. We will read out all of those interrupt in the
>> loop anyway.
> 
> This needs a better explanation. You might want to have a look at the details I 
> provided in another discussion.
> 
> In general, I would like any changes in the vGIC to be very detailed. As they 
> are usually subbtle implication on the rest of the code base.

This patch is suitable for Xen 4.12 with more comments in the commit message.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 13/16] gic-vgic: skip irqs locking
  2018-12-12 12:07   ` Julien Grall
@ 2018-12-12 12:35     ` Andrii Anisov
  2018-12-12 12:44       ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 12:35 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12.12.18 14:07, Julien Grall wrote:
> This chunk relies on patch #1, am I correct?
For sure, it is.

>  If so, this should be written in the commit message that this was introduced recently.
>  This helps to figure out whether the patch can be merged before the rest.
Do you mean I can prepare it for 4.12 together with #1?

I plan to send separately patches you have reviewed first. Then evaluate the rest with TBM. Does it fit your vision?

> I would add an ASSERT(!local_is_irq_enabled()) on top to show that this should be called with interrupt disabled.
OK.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 08/16] gic: separate ppi processing
  2018-11-28 21:32 ` [RFC 08/16] gic: separate ppi processing Andrii Anisov
@ 2018-12-12 12:37   ` Andrii Anisov
  2018-12-12 12:46     ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 12:37 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Julien,

Sorry for bothering you too much.
What about this patch?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 13/16] gic-vgic: skip irqs locking
  2018-12-12 12:35     ` Andrii Anisov
@ 2018-12-12 12:44       ` Julien Grall
  2018-12-12 12:47         ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 12:44 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi Andrii,

On 12/12/2018 12:35, Andrii Anisov wrote:
> 
> 
> On 12.12.18 14:07, Julien Grall wrote:
>> This chunk relies on patch #1, am I correct?
> For sure, it is.
> 
>>  If so, this should be written in the commit message that this was introduced 
>> recently.
>>  This helps to figure out whether the patch can be merged before the rest.
> Do you mean I can prepare it for 4.12 together with #1?

At the moment, I am happy with the second chunk of this patch to go. I am still 
unconvinced #1 is the right thing to go.

> 
> I plan to send separately patches you have reviewed first. Then evaluate the 
> rest with TBM. Does it fit your vision?
Can you send separately patches I said I am happy with for Xen 4.12? We can then 
continue to discuss the others with numbers and see if we can merge them in time.

Cheers,


-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 08/16] gic: separate ppi processing
  2018-12-12 12:37   ` Andrii Anisov
@ 2018-12-12 12:46     ` Julien Grall
  2018-12-12 12:52       ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 12:46 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi,

On 12/12/2018 12:37, Andrii Anisov wrote:
> Julien,
> 
> Sorry for bothering you too much.
> What about this patch?

I haven't answered because I am waiting the numbers.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 13/16] gic-vgic: skip irqs locking
  2018-12-12 12:44       ` Julien Grall
@ 2018-12-12 12:47         ` Andrii Anisov
  0 siblings, 0 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 12:47 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12.12.18 14:44, Julien Grall wrote:
> At the moment, I am happy with the second chunk of this patch to go. I am still unconvinced #1 is the right thing to go.
I got it. So keep the patch still for now.

>> I plan to send separately patches you have reviewed first. Then evaluate the rest with TBM. Does it fit your vision?
> Can you send separately patches I said I am happy with for Xen 4.12? We can then continue to discuss the others with numbers and see if we can merge them in time.
Yep.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 08/16] gic: separate ppi processing
  2018-12-12 12:46     ` Julien Grall
@ 2018-12-12 12:52       ` Andrii Anisov
  2018-12-12 13:46         ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 12:52 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12.12.18 14:46, Julien Grall wrote:
> I haven't answered because I am waiting the numbers.
I got it.
But TBM will not show effect of this patch. Even the patch breaks TBM's functionality because TBM relies on a phys timer interrupt to be assigned to the domain (go through the spi path).

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 08/16] gic: separate ppi processing
  2018-12-12 12:52       ` Andrii Anisov
@ 2018-12-12 13:46         ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2018-12-12 13:46 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi,

On 12/12/2018 12:52, Andrii Anisov wrote:
> 
> 
> On 12.12.18 14:46, Julien Grall wrote:
>> I haven't answered because I am waiting the numbers.
> I got it.
> But TBM will not show effect of this patch.

If TBM does not show effect, then you need a different benchmark. Probably by 
looking at how the PPI latency in Xen.

> Even the patch breaks TBM's 
> functionality because TBM relies on a phys timer interrupt to be assigned to the 
> domain (go through the spi path).

Well, the problem is your patch assumes PPI cannot be assigned to the guest. 
While this is the case today, there are plan for handing over PPI (such as 
timer) to guest. This will help to remove the hack we have for the timer in both 
Xen code base and Guest code base.

I would actually prefer if we try to optimize do_IRQ. Actually, this patch has 
some good ideas that can be re-used in the current code.

For instance, I think you can drop the logic that use _IRQ_PENDING because the 
interrupt can never be received to another PE while active (this will be cleared 
by desc->handler->end()).

Similarly the test on _IRQ_INPROGRESS could be dropped. Although, I would still 
like to keep set/clear _IRQ_INPROGRESS.

With those changes, do_IRQ becomes a lot more like the function do_ppi. The only 
difference is the spinlock. But I don't believe this will bring that much 
difference in performance impact. After all the lock should unlikely be contended.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 11:59       ` Julien Grall
@ 2018-12-12 13:51         ` Andrii Anisov
  2018-12-12 14:49           ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 13:51 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Julien,


On 12.12.18 13:59, Julien Grall wrote:
> An ASSERT(...) is already embed in irq_get_guest_info(desc).
Yep.

> I thought about the ASSERT(...) over the current printk yesterday. But I discarded it because it does not give you more information if something went really wrong as the stack trace would not really be helpful in that context.
But ASSERT also prints a failed condition text.

> So I would prefer the #ifdef NDEBUG solution.
I'm a bit confused about your suggestion.
Do you want to see two identical pieces of code before and after the `if ( test_bit(_IRQ_GUEST, &desc->status) )`? One with `#ifndef NDEBUG`, other without.
Is my understanding correct?


-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 13:51         ` Andrii Anisov
@ 2018-12-12 14:49           ` Julien Grall
  2018-12-12 14:58             ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 14:49 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov

Hi,

On 12/12/2018 13:51, Andrii Anisov wrote:
> Julien,
> 
> 
> On 12.12.18 13:59, Julien Grall wrote:
>> An ASSERT(...) is already embed in irq_get_guest_info(desc).
> Yep.
> 
>> I thought about the ASSERT(...) over the current printk yesterday. But I 
>> discarded it because it does not give you more information if something went 
>> really wrong as the stack trace would not really be helpful in that context.
> But ASSERT also prints a failed condition text.

ASSERT only tells you that desc->action was NULL. It does not tell you which IRQ 
has the desc->action == NULL. It is a bit a shame we don't have way to provide 
another message with ASSERT to help you debugging.

> 
>> So I would prefer the #ifdef NDEBUG solution.
> I'm a bit confused about your suggestion.
> Do you want to see two identical pieces of code before and after the `if ( 
> test_bit(_IRQ_GUEST, &desc->status) )`? One with `#ifndef NDEBUG`, other without.
> Is my understanding correct?

No, I am suggesting to have the current if ( test_bit(...) ) protecred with 
#ifndef NDEBUG. No code duplication.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 14:49           ` Julien Grall
@ 2018-12-12 14:58             ` Andrii Anisov
  2018-12-12 15:08               ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 14:58 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12.12.18 16:49, Julien Grall wrote:
> ASSERT only tells you that desc->action was NULL. It does not tell you which IRQ has the desc->action == NULL.
Ah, yes.

> It is a bit a shame we don't have way to provide another message with ASSERT to help you debugging.
We might have implemented an assert with a message.
And guys on stackoverflow suggest for workaround `ASSERT(condition && "error message")`.

> No, I am suggesting to have the current if ( test_bit(...) ) protecred with #ifndef NDEBUG. No code duplication.
I would not like to leave even non-debug build without the check.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 14:58             ` Andrii Anisov
@ 2018-12-12 15:08               ` Julien Grall
  2018-12-12 15:57                 ` Andrii Anisov
  0 siblings, 1 reply; 59+ messages in thread
From: Julien Grall @ 2018-12-12 15:08 UTC (permalink / raw)
  To: Andrii Anisov; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov



On 12/12/2018 14:58, Andrii Anisov wrote:
> 
> 
> On 12.12.18 16:49, Julien Grall wrote:
>> ASSERT only tells you that desc->action was NULL. It does not tell you which 
>> IRQ has the desc->action == NULL.
> Ah, yes.
> 
>> It is a bit a shame we don't have way to provide another message with ASSERT 
>> to help you debugging.
> We might have implemented an assert with a message.
> And guys on stackoverflow suggest for workaround `ASSERT(condition && "error 
> message")`.

That's not going to help for showing the IRQ number.

> 
>> No, I am suggesting to have the current if ( test_bit(...) ) protecred with 
>> #ifndef NDEBUG. No code duplication.
> I would not like to leave even non-debug build without the check.

The check is mainly there to catch error in debug build. So I am quite confused 
what is your goal by moving the check. Is it just because it adds a couple more 
instruction in the guest case?

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [RFC 11/16] irq: skip action avalability check for guest's IRQ
  2018-12-12 15:08               ` Julien Grall
@ 2018-12-12 15:57                 ` Andrii Anisov
  0 siblings, 0 replies; 59+ messages in thread
From: Andrii Anisov @ 2018-12-12 15:57 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, Stefano Stabellini, Andrii Anisov


On 12.12.18 17:08, Julien Grall wrote:
> Is it just because it adds a couple more instruction in the guest case?
And add unlikely for XEN IRQ case. That was the idea.

> The check is mainly there to catch error in debug build.
I supposed a race with `release_irq()`, but found out that we are safe with `_IRQ_DISABLED` flag when `action` is not set.
So I'll keep the code in its current place (as of mainline) and wrap it with `#ifndef NDEBUG`.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [Xen-devel] [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path
  2018-11-28 22:00   ` Julien Grall
@ 2019-07-30 17:34     ` Andrii Anisov
  2019-07-30 20:46       ` Julien Grall
  0 siblings, 1 reply; 59+ messages in thread
From: Andrii Anisov @ 2019-07-30 17:34 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini

Hello Julien,

It looks I missed answering this email. So do this now:

On 29.11.18 00:00, Julien Grall wrote:
> On 28/11/2018 21:31, Andrii Anisov wrote:
>> From: Julien Grall <julien.grall@arm.com>
>>
>> This makes function enter_hypervisor_head() being executed with
>> irqs locked. This also give a fine side effect - it assures that
>> LRs are cleared prior to any IRQs processing, which leads to a
>> better (faster) IRQs processing.
> 
> Again, this need some rationale why is it faster. And how this is going
> to impact the other vGIC...

With the current code, there is a chance that hypervisor performs do_IRQ() before fetching LRs with the information about IRQs being processed by a guest. This could happen in guest trap paths where interrupts are enabled before `enter_hypervisor_head()` call.

Performing `do_IRQ()` prior to `vgic_sync_from_lrs()` is assumed as less efficient than doing it vice versa for high IRQ rate use-cases with the number of different interrupts (specific for multimedia scenarios).

  - For the old vgic implementation, `vgic_sync_from_lrs()` release LRs from processed interrupts also shortens `inflight_irqs` sorted list. So `do_IRQ()` has better chances to write IRQ directly to LR instead of saving it into `lr_pending` list and possibly faster insert the new IRQ into `inflight_irqs` list.

  - For the new vgic implementation, `vgic_sync_from_lrs()` fetches about processed interrupts from LRs and shortens `ap_list`. So `do_IRQ()` could potentially faster insert the new interrupt into `ap_list`.


-- 
Sincerely,
Andrii Anisov.
.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [Xen-devel] [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path
  2019-07-30 17:34     ` [Xen-devel] " Andrii Anisov
@ 2019-07-30 20:46       ` Julien Grall
  0 siblings, 0 replies; 59+ messages in thread
From: Julien Grall @ 2019-07-30 20:46 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: xen-devel, nd, Andrii Anisov, Stefano Stabellini, Andre Przywara

(+ Andre)

On 7/30/19 6:34 PM, Andrii Anisov wrote:
> Hello Julien,
> 
> It looks I missed answering this email. So do this now:
> 
> On 29.11.18 00:00, Julien Grall wrote:
>> On 28/11/2018 21:31, Andrii Anisov wrote:
>>> From: Julien Grall <julien.grall@arm.com>
>>>
>>> This makes function enter_hypervisor_head() being executed with
>>> irqs locked. This also give a fine side effect - it assures that
>>> LRs are cleared prior to any IRQs processing, which leads to a
>>> better (faster) IRQs processing.
>>
>> Again, this need some rationale why is it faster. And how this is going
>> to impact the other vGIC...
> 
> With the current code, there is a chance that hypervisor performs 
> do_IRQ() before fetching LRs with the information about IRQs being 
> processed by a guest. This could happen in guest trap paths where 
> interrupts are enabled before `enter_hypervisor_head()` call.
> 
> Performing `do_IRQ()` prior to `vgic_sync_from_lrs()` is assumed as less 
> efficient than doing it vice versa for high IRQ rate use-cases with the 
> number of different interrupts (specific for multimedia scenarios).

What you suggest here is really specific to your use case. However, 
think about the Real-Time case where you may want to receive your 
interrupt as soon as possible.

In general, keeping the interrupt disabled for a long time is a pretty 
bad idea because you increase latency. Interrupts should only be 
disabled when it is strictly necessary.

At the moment I don't view this change as necessary. But I am happy to 
be proven otherwise.

>   - For the old vgic implementation, `vgic_sync_from_lrs()` release LRs 
> from processed interrupts also shortens `inflight_irqs` sorted list. So 
> `do_IRQ()` has better chances to write IRQ directly to LR instead of 
> saving it into `lr_pending` list and possibly faster insert the new IRQ 
> into `inflight_irqs` list.
Saving to lr_pending means that you have all the LRs full.

I vaguely recall we spoke about it before but I can't find anything in 
my inbox. So can you explain again your use-case, the number of LRs...?

Note this is were writing down everything in the commit message (or 
after ---) helps people to understand where you are coming from.

Cheers,


-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2019-07-30 20:47 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1543440731-21947-1-git-send-email-andrii.anisov@gmail.com>
2018-11-28 21:31 ` [RFC 01/16] xen/arm: Re-enable interrupt later in the trap path Andrii Anisov
2018-11-28 22:00   ` Julien Grall
2019-07-30 17:34     ` [Xen-devel] " Andrii Anisov
2019-07-30 20:46       ` Julien Grall
2018-11-28 21:31 ` [RFC 02/16] hack: drop GIC v3 support Andrii Anisov
2018-11-29 12:08   ` Andre Przywara
2018-11-28 21:31 ` [RFC 03/16] vgic: move pause_flags check out of vgic spinlock Andrii Anisov
2018-11-28 22:02   ` Julien Grall
2018-11-28 21:31 ` [RFC 04/16] vgic: move irq_to_pending out of lock Andrii Anisov
2018-11-29 11:07   ` Julien Grall
2018-11-28 21:32 ` [RFC 05/16] gic-vgic: Drop an excessive clear_lrs Andrii Anisov
2018-12-11 14:33   ` Julien Grall
2018-12-12 11:01     ` Andrii Anisov
2018-12-12 11:55       ` Julien Grall
2018-11-28 21:32 ` [RFC 06/16] gic: drop interrupts enabling on interrupts processing Andrii Anisov
2018-11-28 22:06   ` Julien Grall
2018-12-12 12:10     ` Julien Grall
2018-11-28 21:32 ` [RFC 07/16] gic-vgic:vgic: do not keep disabled IRQs in any of queues Andrii Anisov
2018-11-28 21:32 ` [RFC 08/16] gic: separate ppi processing Andrii Anisov
2018-12-12 12:37   ` Andrii Anisov
2018-12-12 12:46     ` Julien Grall
2018-12-12 12:52       ` Andrii Anisov
2018-12-12 13:46         ` Julien Grall
2018-11-28 21:32 ` [RFC 09/16] gic-vgic:vgic: avoid excessive conversions Andrii Anisov
2018-11-29 10:23   ` Julien Grall
2018-11-28 21:32 ` [RFC 10/16] gic:vgic:gic-vgic: introduce non-atomic bitops Andrii Anisov
2018-11-29 12:14   ` Andre Przywara
2018-11-29 16:07     ` Julien Grall
2018-12-03 12:33     ` Andrii Anisov
2018-12-03 12:58       ` Julien Grall
2018-12-03 13:05         ` Andrii Anisov
2018-12-03 13:08           ` Julien Grall
2018-12-03 16:37       ` Andre Przywara
2018-11-28 21:32 ` [RFC 11/16] irq: skip action avalability check for guest's IRQ Andrii Anisov
2018-12-11 14:48   ` Julien Grall
2018-12-12 11:30     ` Andrii Anisov
2018-12-12 11:59       ` Julien Grall
2018-12-12 13:51         ` Andrii Anisov
2018-12-12 14:49           ` Julien Grall
2018-12-12 14:58             ` Andrii Anisov
2018-12-12 15:08               ` Julien Grall
2018-12-12 15:57                 ` Andrii Anisov
2018-11-28 21:32 ` [RFC 12/16] gic-v2: Write HCR only on change Andrii Anisov
2018-11-29 16:36   ` Julien Grall
2018-11-28 21:32 ` [RFC 13/16] gic-vgic: skip irqs locking Andrii Anisov
2018-12-12 12:07   ` Julien Grall
2018-12-12 12:35     ` Andrii Anisov
2018-12-12 12:44       ` Julien Grall
2018-12-12 12:47         ` Andrii Anisov
2018-11-28 21:32 ` [RFC 14/16] hack: arm/domain: simplify context restore from idle vcpu Andrii Anisov
2018-11-29 15:53   ` Julien Grall
2018-11-29 16:00   ` Wei Liu
2018-11-28 21:32 ` [RFC 15/16] hack: move gicv2 LRs reads and writes out of spinlocks Andrii Anisov
2018-11-28 21:58   ` Julien Grall
2018-11-28 21:32 ` [RFC 16/16] gic: vgic: align frequently accessed data by cache line size Andrii Anisov
2018-11-29 16:24   ` Julien Grall
2018-11-29  7:40 ` [RFC 00/16] Old GIC (gic-vgic) optimizations for GICV2 Andrii Anisov
2018-11-29 11:00   ` Julien Grall
2018-11-29 12:07   ` Andre Przywara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.