All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86/hvmloader: Fixes/improvements
@ 2022-08-24 10:59 Andrew Cooper
  2022-08-24 10:59 ` [PATCH 1/4] x86/hvmloader: SMP improvements Andrew Cooper
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Andrew Cooper @ 2022-08-24 10:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

All encountered while debugging a regression in Xen 4.17.

They're all very trivial, and addressing low hanging fruit.

Andrew Cooper (4):
  x86/hvmloader: SMP improvements
  x86/hvmloader: Don't build as PIC/PIE
  x86/hvmloader: Don't override stddef.h
  x86/hvmloader: Move various helpers to being static inlines

 tools/firmware/hvmloader/Makefile    |  3 +-
 tools/firmware/hvmloader/config.h    |  2 +-
 tools/firmware/hvmloader/hvmloader.c |  1 -
 tools/firmware/hvmloader/mp_tables.c |  2 +-
 tools/firmware/hvmloader/smp.c       | 46 ++++++++++++------
 tools/firmware/hvmloader/util.c      | 78 +-----------------------------
 tools/firmware/hvmloader/util.h      | 94 +++++++++++++++++++++++++++++-------
 7 files changed, 112 insertions(+), 114 deletions(-)

-- 
2.11.0



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] x86/hvmloader: SMP improvements
  2022-08-24 10:59 [PATCH 0/4] x86/hvmloader: Fixes/improvements Andrew Cooper
@ 2022-08-24 10:59 ` Andrew Cooper
  2022-08-24 14:21   ` Jan Beulich
  2022-08-24 10:59 ` [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE Andrew Cooper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2022-08-24 10:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

 * Use MOV CR instead of LMSW.  LMSW has no decode assist at all on AMD CPUs,
   forcing us to fully emulate the instruction.
 * Use __attribute__((used)) to fix the comment about ap_start().
 * Have ap_start() perform a self-INIT for APs, rather than having boot_cpu()
   do it.  This is marginally more parallel, and reduces the amount of remote
   vCPU management that Xen has to do on behalf of the guest.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 tools/firmware/hvmloader/smp.c | 46 ++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
index 082b17f13818..80154950ac32 100644
--- a/tools/firmware/hvmloader/smp.c
+++ b/tools/firmware/hvmloader/smp.c
@@ -35,9 +35,9 @@ asm (
     "    mov   %cs,%ax               \n"
     "    mov   %ax,%ds               \n"
     "    lgdt  gdt_desr-ap_boot_start\n"
-    "    xor   %ax, %ax              \n"
-    "    inc   %ax                   \n"
-    "    lmsw  %ax                   \n"
+    "    mov   %cr0, %eax            \n"
+    "    or    $1, %al               \n"
+    "    mov   %eax, %cr0            \n"
     "    ljmpl $0x08,$1f             \n"
     "gdt_desr:                       \n"
     "    .word gdt_end - gdt - 1     \n"
@@ -50,8 +50,6 @@ asm (
     "    movl  $stack_top,%esp       \n"
     "    movl  %esp,%ebp             \n"
     "    call  ap_start              \n"
-    "1:  hlt                         \n"
-    "    jmp  1b                     \n"
     "                                \n"
     "    .align 8                    \n"
     "gdt:                            \n"
@@ -68,14 +66,37 @@ asm (
     "    .text                       \n"
     );
 
-void ap_start(void); /* non-static avoids unused-function compiler warning */
-/*static*/ void ap_start(void)
+static void __attribute__((used)) ap_start(void)
 {
-    printf(" - CPU%d ... ", ap_cpuid);
+    unsigned int cpu = ap_cpuid;
+
+    printf(" - CPU%d ... ", cpu);
     cacheattr_init();
     printf("done.\n");
-    wmb();
-    ap_callin = 1;
+
+    /*
+     * Call in to the BSP.  For APs, take ourselves offline.
+     *
+     * We must not use the stack after calling in to the BSP.
+     */
+    asm volatile (
+        "    movb $1, ap_callin          \n"
+
+        "    test %[icr2], %[icr2]       \n"
+        "    jz   .Lbsp                  \n"
+
+        "    movl %[icr2], %[ICR2]       \n"
+        "    movl %[init], %[ICR1]       \n"
+        "1:  hlt                         \n"
+        "    jmp  1b                     \n"
+
+        ".Lbsp:                          \n"
+        :
+        : [icr2] "r" (SET_APIC_DEST_FIELD(LAPIC_ID(cpu))),
+          [init] "i" (APIC_DM_INIT),
+          [ICR1] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR)),
+          [ICR2] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR2))
+        : "memory" );
 }
 
 static void lapic_wait_ready(void)
@@ -111,11 +132,6 @@ static void boot_cpu(unsigned int cpu)
      */
     while ( !ap_callin )
         cpu_relax();
-
-    /* Take the secondary processor offline. */
-    lapic_write(APIC_ICR2, icr2);
-    lapic_write(APIC_ICR, APIC_DM_INIT);
-    lapic_wait_ready();    
 }
 
 void smp_initialise(void)
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE
  2022-08-24 10:59 [PATCH 0/4] x86/hvmloader: Fixes/improvements Andrew Cooper
  2022-08-24 10:59 ` [PATCH 1/4] x86/hvmloader: SMP improvements Andrew Cooper
@ 2022-08-24 10:59 ` Andrew Cooper
  2022-08-25  7:20   ` Jan Beulich
  2022-08-24 10:59 ` [PATCH 3/4] x86/hvmloader: Don't override stddef.h Andrew Cooper
  2022-08-24 10:59 ` [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines Andrew Cooper
  3 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2022-08-24 10:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

HVMLoader is not relocatable in memory, and 32bit PIC code has a large
overhead.  Build it as non-relocatable.

Bloat-o-meter reports a net:
  add/remove: 0/0 grow/shrink: 3/107 up/down: 14/-3370 (-3356)

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 tools/firmware/hvmloader/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile
index 4f31c881613c..eb757819274b 100644
--- a/tools/firmware/hvmloader/Makefile
+++ b/tools/firmware/hvmloader/Makefile
@@ -23,7 +23,8 @@ include $(XEN_ROOT)/tools/firmware/Rules.mk
 # SMBIOS spec requires format mm/dd/yyyy
 SMBIOS_REL_DATE ?= $(shell date +%m/%d/%Y)
 
-CFLAGS += $(CFLAGS_xeninclude)
+CFLAGS += $(CFLAGS_xeninclude) -fno-pic
+$(call cc-option-add,CFLAGS,-no-pie)
 
 # We mustn't use tools-only public interfaces.
 CFLAGS += -D__XEN_INTERFACE_VERSION__=__XEN_LATEST_INTERFACE_VERSION__
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] x86/hvmloader: Don't override stddef.h
  2022-08-24 10:59 [PATCH 0/4] x86/hvmloader: Fixes/improvements Andrew Cooper
  2022-08-24 10:59 ` [PATCH 1/4] x86/hvmloader: SMP improvements Andrew Cooper
  2022-08-24 10:59 ` [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE Andrew Cooper
@ 2022-08-24 10:59 ` Andrew Cooper
  2022-08-25  8:03   ` Jan Beulich
  2022-08-24 10:59 ` [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines Andrew Cooper
  3 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2022-08-24 10:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

Since c/s 73b13705af7c ("firmware: provide a stand alone set of headers"),
we've had an implementation of offsetof() which isn't undefined behaviour.
Actually use it.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 tools/firmware/hvmloader/util.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 8d95eab28a65..ac7ff264e247 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -28,12 +28,6 @@ enum {
 #define SEL_DATA32          0x0020
 #define SEL_CODE64          0x0028
 
-#undef offsetof
-#define offsetof(t, m) ((unsigned long)&((t *)0)->m)
-
-#undef NULL
-#define NULL ((void*)0)
-
 void __assert_failed(const char *assertion, const char *file, int line)
     __attribute__((noreturn));
 #define ASSERT(p) \
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines
  2022-08-24 10:59 [PATCH 0/4] x86/hvmloader: Fixes/improvements Andrew Cooper
                   ` (2 preceding siblings ...)
  2022-08-24 10:59 ` [PATCH 3/4] x86/hvmloader: Don't override stddef.h Andrew Cooper
@ 2022-08-24 10:59 ` Andrew Cooper
  2022-08-25  8:14   ` Jan Beulich
  3 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2022-08-24 10:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

The IO port, MSR, IO-APIC and LAPIC accessors compile typically to single or
pairs of instructions, which is less overhead than even the stack manipulation
to call the helpers.

Move the implementations from util.c to being static inlines in util.h

In addition, turn ioapic_base_address into a constant as it is never modified
from 0xfec00000 (substantially shrinks the IO-APIC logic), and make use of the
"A" constraint for WRMSR/RDMSR like we already do for RDSTC.

Bloat-o-meter reports a net:
  add/remove: 0/13 grow/shrink: 1/19 up/down: 6/-743 (-737)

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 tools/firmware/hvmloader/config.h    |  2 +-
 tools/firmware/hvmloader/hvmloader.c |  1 -
 tools/firmware/hvmloader/mp_tables.c |  2 +-
 tools/firmware/hvmloader/util.c      | 78 +-------------------------------
 tools/firmware/hvmloader/util.h      | 88 +++++++++++++++++++++++++++++++-----
 5 files changed, 79 insertions(+), 92 deletions(-)

diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
index c82adf6dc508..b16fad300fbc 100644
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -44,7 +44,7 @@ extern struct bios_config ovmf_config;
 #define PAGE_SHIFT 12
 #define PAGE_SIZE  (1ul << PAGE_SHIFT)
 
-extern uint32_t ioapic_base_address;
+#define IOAPIC_BASE_ADDRESS 0xfec00000
 extern uint8_t ioapic_version;
 
 #define IOAPIC_ID           0x01
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
index c58841e5b556..f8af88fabf24 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -113,7 +113,6 @@ asm (
 
 unsigned long scratch_start = SCRATCH_PHYSICAL_ADDRESS;
 
-uint32_t ioapic_base_address = 0xfec00000;
 uint8_t ioapic_version;
 
 bool acpi_enabled;
diff --git a/tools/firmware/hvmloader/mp_tables.c b/tools/firmware/hvmloader/mp_tables.c
index d207ecbf00c9..77d3010406d0 100644
--- a/tools/firmware/hvmloader/mp_tables.c
+++ b/tools/firmware/hvmloader/mp_tables.c
@@ -229,7 +229,7 @@ static void fill_mp_ioapic_entry(struct mp_ioapic_entry *mpie)
     mpie->ioapic_id = IOAPIC_ID;
     mpie->ioapic_version = ioapic_version;
     mpie->ioapic_flags = 1; /* enabled */
-    mpie->ioapic_addr = ioapic_base_address;
+    mpie->ioapic_addr = IOAPIC_BASE_ADDRESS;
 }
 
 
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index 581b35e5cfb5..d1dcc2844a43 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -42,60 +42,6 @@ bool check_overlap(uint64_t start, uint64_t size,
             (start < reserved_start + reserved_size);
 }
 
-void wrmsr(uint32_t idx, uint64_t v)
-{
-    asm volatile (
-        "wrmsr"
-        : : "c" (idx), "a" ((uint32_t)v), "d" ((uint32_t)(v>>32)) );
-}
-
-uint64_t rdmsr(uint32_t idx)
-{
-    uint32_t lo, hi;
-
-    asm volatile (
-        "rdmsr"
-        : "=a" (lo), "=d" (hi) : "c" (idx) );
-
-    return (lo | ((uint64_t)hi << 32));
-}
-
-void outb(uint16_t addr, uint8_t val)
-{
-    asm volatile ( "outb %%al, %%dx" : : "d" (addr), "a" (val) );
-}
-
-void outw(uint16_t addr, uint16_t val)
-{
-    asm volatile ( "outw %%ax, %%dx" : : "d" (addr), "a" (val) );
-}
-
-void outl(uint16_t addr, uint32_t val)
-{
-    asm volatile ( "outl %%eax, %%dx" : : "d" (addr), "a" (val) );
-}
-
-uint8_t inb(uint16_t addr)
-{
-    uint8_t val;
-    asm volatile ( "inb %%dx,%%al" : "=a" (val) : "d" (addr) );
-    return val;
-}
-
-uint16_t inw(uint16_t addr)
-{
-    uint16_t val;
-    asm volatile ( "inw %%dx,%%ax" : "=a" (val) : "d" (addr) );
-    return val;
-}
-
-uint32_t inl(uint16_t addr)
-{
-    uint32_t val;
-    asm volatile ( "inl %%dx,%%eax" : "=a" (val) : "d" (addr) );
-    return val;
-}
-
 uint8_t cmos_inb(uint8_t idx)
 {
     outb(0x70, idx);
@@ -493,28 +439,6 @@ void *scratch_alloc(uint32_t size, uint32_t align)
     return (void *)(unsigned long)s;
 }
 
-uint32_t ioapic_read(uint32_t reg)
-{
-    *(volatile uint32_t *)(ioapic_base_address + 0x00) = reg;
-    return *(volatile uint32_t *)(ioapic_base_address + 0x10);
-}
-
-void ioapic_write(uint32_t reg, uint32_t val)
-{
-    *(volatile uint32_t *)(ioapic_base_address + 0x00) = reg;
-    *(volatile uint32_t *)(ioapic_base_address + 0x10) = val;
-}
-
-uint32_t lapic_read(uint32_t reg)
-{
-    return *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg);
-}
-
-void lapic_write(uint32_t reg, uint32_t val)
-{
-    *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg) = val;
-}
-
 #define PCI_CONF1_ADDRESS(bus, devfn, reg) \
     (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
 
@@ -943,7 +867,7 @@ void hvmloader_acpi_build_tables(struct acpi_config *config,
 
     config->lapic_base_address = LAPIC_BASE_ADDRESS;
     config->lapic_id = acpi_lapic_id;
-    config->ioapic_base_address = ioapic_base_address;
+    config->ioapic_base_address = IOAPIC_BASE_ADDRESS;
     config->ioapic_id = IOAPIC_ID;
     config->pci_isa_irq_mask = PCI_ISA_IRQ_MASK; 
 
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index ac7ff264e247..b6108a705eab 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 #include <xen/xen.h>
 #include <xen/hvm/hvm_info_table.h>
+#include "config.h"
 #include "e820.h"
 
 /* Request un-prefixed values from errno.h. */
@@ -61,28 +62,91 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 }
 
 /* MSR access */
-void wrmsr(uint32_t idx, uint64_t v);
-uint64_t rdmsr(uint32_t idx);
+static inline void wrmsr(uint32_t idx, uint64_t v)
+{
+    asm volatile ( "wrmsr" :: "c" (idx), "A" (v) : "memory" );
+}
+
+static inline uint64_t rdmsr(uint32_t idx)
+{
+    uint64_t res;
+
+    asm volatile ( "rdmsr" : "=A" (res) : "c" (idx) );
+
+    return res;
+}
 
 /* I/O output */
-void outb(uint16_t addr, uint8_t  val);
-void outw(uint16_t addr, uint16_t val);
-void outl(uint16_t addr, uint32_t val);
+static inline void outb(uint16_t addr, uint8_t val)
+{
+    asm volatile ( "outb %%al, %%dx" :: "d" (addr), "a" (val) );
+}
+
+static inline void outw(uint16_t addr, uint16_t val)
+{
+    asm volatile ( "outw %%ax, %%dx" :: "d" (addr), "a" (val) );
+}
+
+static inline void outl(uint16_t addr, uint32_t val)
+{
+    asm volatile ( "outl %%eax, %%dx" :: "d" (addr), "a" (val) );
+}
 
 /* I/O input */
-uint8_t  inb(uint16_t addr);
-uint16_t inw(uint16_t addr);
-uint32_t inl(uint16_t addr);
+static inline uint8_t inb(uint16_t addr)
+{
+    uint8_t val;
+
+    asm volatile ( "inb %%dx,%%al" : "=a" (val) : "d" (addr) );
+
+    return val;
+}
+
+static inline uint16_t inw(uint16_t addr)
+{
+    uint16_t val;
+
+    asm volatile ( "inw %%dx,%%ax" : "=a" (val) : "d" (addr) );
+
+    return val;
+}
+
+static inline uint32_t inl(uint16_t addr)
+{
+    uint32_t val;
+
+    asm volatile ( "inl %%dx,%%eax" : "=a" (val) : "d" (addr) );
+
+    return val;
+}
 
 /* CMOS access */
 uint8_t cmos_inb(uint8_t idx);
 void cmos_outb(uint8_t idx, uint8_t val);
 
 /* APIC access */
-uint32_t ioapic_read(uint32_t reg);
-void ioapic_write(uint32_t reg, uint32_t val);
-uint32_t lapic_read(uint32_t reg);
-void lapic_write(uint32_t reg, uint32_t val);
+static inline uint32_t ioapic_read(uint32_t reg)
+{
+    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x00) = reg;
+    return *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x10);
+}
+
+static inline void ioapic_write(uint32_t reg, uint32_t val)
+{
+    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x00) = reg;
+    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x10) = val;
+}
+
+#define LAPIC_BASE_ADDRESS  0xfee00000
+static inline uint32_t lapic_read(uint32_t reg)
+{
+    return *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg);
+}
+
+static inline void lapic_write(uint32_t reg, uint32_t val)
+{
+    *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg) = val;
+}
 
 /* PCI access */
 uint32_t pci_read(uint32_t devfn, uint32_t reg, uint32_t len);
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] x86/hvmloader: SMP improvements
  2022-08-24 10:59 ` [PATCH 1/4] x86/hvmloader: SMP improvements Andrew Cooper
@ 2022-08-24 14:21   ` Jan Beulich
  2023-03-24 11:46     ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2022-08-24 14:21 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 24.08.2022 12:59, Andrew Cooper wrote:
> --- a/tools/firmware/hvmloader/smp.c
> +++ b/tools/firmware/hvmloader/smp.c
> @@ -35,9 +35,9 @@ asm (
>      "    mov   %cs,%ax               \n"
>      "    mov   %ax,%ds               \n"
>      "    lgdt  gdt_desr-ap_boot_start\n"
> -    "    xor   %ax, %ax              \n"
> -    "    inc   %ax                   \n"
> -    "    lmsw  %ax                   \n"
> +    "    mov   %cr0, %eax            \n"
> +    "    or    $1, %al               \n"
> +    "    mov   %eax, %cr0            \n"

Hmm, yes, read-modify-write should probably have been used from
the beginning, irrespective of using 286 or 386 insns.

> @@ -68,14 +66,37 @@ asm (
>      "    .text                       \n"
>      );
>  
> -void ap_start(void); /* non-static avoids unused-function compiler warning */
> -/*static*/ void ap_start(void)
> +static void __attribute__((used)) ap_start(void)
>  {
> -    printf(" - CPU%d ... ", ap_cpuid);
> +    unsigned int cpu = ap_cpuid;
> +
> +    printf(" - CPU%d ... ", cpu);
>      cacheattr_init();
>      printf("done.\n");
> -    wmb();

Is there a reason you remove this barrier but not the one in boot_cpu()?

> -    ap_callin = 1;
> +
> +    /*
> +     * Call in to the BSP.  For APs, take ourselves offline.
> +     *
> +     * We must not use the stack after calling in to the BSP.
> +     */
> +    asm volatile (
> +        "    movb $1, ap_callin          \n"
> +
> +        "    test %[icr2], %[icr2]       \n"
> +        "    jz   .Lbsp                  \n"

Are we intending to guarantee going forward that the BSP always has
APIC ID zero?

> +        "    movl %[icr2], %[ICR2]       \n"
> +        "    movl %[init], %[ICR1]       \n"
> +        "1:  hlt                         \n"
> +        "    jmp  1b                     \n"

The use of the function for the BSP is questionable anyway. What is
really needed is the call to cacheattr_init(). I'm inclined to
suggest to move to something like

void smp_initialise(void)
{
    unsigned int i, nr_cpus = hvm_info->nr_vcpus;

    cacheattr_init();

    if ( nr_cpus <= 1 )
        return;

    memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);

    printf("Multiprocessor initialisation:\n");
    for ( i = 1; i < nr_cpus; i++ )
        boot_cpu(i);
}

thus eliminating bogus output when there's just one vCPU. 

Then the function here can become noreturn (which I was about to suggest
until spotting that for the BSP the function actually does return).

> +        ".Lbsp:                          \n"
> +        :
> +        : [icr2] "r" (SET_APIC_DEST_FIELD(LAPIC_ID(cpu))),
> +          [init] "i" (APIC_DM_INIT),
> +          [ICR1] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR)),
> +          [ICR2] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR2))
> +        : "memory" );

Can't you use APIC_DEST_SELF now, avoiding the need to fiddle
with ICR2?

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE
  2022-08-24 10:59 ` [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE Andrew Cooper
@ 2022-08-25  7:20   ` Jan Beulich
  2023-03-24 11:18     ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2022-08-25  7:20 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 24.08.2022 12:59, Andrew Cooper wrote:
> HVMLoader is not relocatable in memory, and 32bit PIC code has a large
> overhead.  Build it as non-relocatable.
> 
> Bloat-o-meter reports a net:
>   add/remove: 0/0 grow/shrink: 3/107 up/down: 14/-3370 (-3356)
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> ---
>  tools/firmware/hvmloader/Makefile | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile
> index 4f31c881613c..eb757819274b 100644
> --- a/tools/firmware/hvmloader/Makefile
> +++ b/tools/firmware/hvmloader/Makefile
> @@ -23,7 +23,8 @@ include $(XEN_ROOT)/tools/firmware/Rules.mk
>  # SMBIOS spec requires format mm/dd/yyyy
>  SMBIOS_REL_DATE ?= $(shell date +%m/%d/%Y)
>  
> -CFLAGS += $(CFLAGS_xeninclude)
> +CFLAGS += $(CFLAGS_xeninclude) -fno-pic
> +$(call cc-option-add,CFLAGS,-no-pie)

This is supposed to be coming from EMBEDDED_EXTRA_CFLAGS, if only
it was spelled correctly there. See the patch just sent. This line
(see that other patch) is meaningless anyway, as we don't use
$(CFLAGS) for linking here. So with it dropped
Reviewed-by: Jan Beulich <jbeulich@suse.com>

I do think though that the description could do with some expanding,
as I don't think -fpic or -fPIC is the default normally. I suppose
it's only specific distros which make this the default.

Jan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] x86/hvmloader: Don't override stddef.h
  2022-08-24 10:59 ` [PATCH 3/4] x86/hvmloader: Don't override stddef.h Andrew Cooper
@ 2022-08-25  8:03   ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2022-08-25  8:03 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 24.08.2022 12:59, Andrew Cooper wrote:
> Since c/s 73b13705af7c ("firmware: provide a stand alone set of headers"),
> we've had an implementation of offsetof() which isn't undefined behaviour.
> Actually use it.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Jan Beulich <jbeulich@suse.com>



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines
  2022-08-24 10:59 ` [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines Andrew Cooper
@ 2022-08-25  8:14   ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2022-08-25  8:14 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 24.08.2022 12:59, Andrew Cooper wrote:
> The IO port, MSR, IO-APIC and LAPIC accessors compile typically to single or
> pairs of instructions, which is less overhead than even the stack manipulation
> to call the helpers.
> 
> Move the implementations from util.c to being static inlines in util.h
> 
> In addition, turn ioapic_base_address into a constant as it is never modified
> from 0xfec00000 (substantially shrinks the IO-APIC logic), and make use of the
> "A" constraint for WRMSR/RDMSR like we already do for RDSTC.

Nit: RDTSC

> Bloat-o-meter reports a net:
>   add/remove: 0/13 grow/shrink: 1/19 up/down: 6/-743 (-737)
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
albeit with several further nits/suggestions:

> --- a/tools/firmware/hvmloader/util.h
> +++ b/tools/firmware/hvmloader/util.h
> @@ -7,6 +7,7 @@
>  #include <stdbool.h>
>  #include <xen/xen.h>
>  #include <xen/hvm/hvm_info_table.h>
> +#include "config.h"
>  #include "e820.h"
>  
>  /* Request un-prefixed values from errno.h. */
> @@ -61,28 +62,91 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
>  }
>  
>  /* MSR access */
> -void wrmsr(uint32_t idx, uint64_t v);
> -uint64_t rdmsr(uint32_t idx);
> +static inline void wrmsr(uint32_t idx, uint64_t v)
> +{
> +    asm volatile ( "wrmsr" :: "c" (idx), "A" (v) : "memory" );

The addition of the "memory" clobber imo wants mentioning in the
description, so it's clear that it's intentional (and why).

> +}
> +
> +static inline uint64_t rdmsr(uint32_t idx)
> +{
> +    uint64_t res;
> +
> +    asm volatile ( "rdmsr" : "=A" (res) : "c" (idx) );
> +
> +    return res;
> +}
>  
>  /* I/O output */
> -void outb(uint16_t addr, uint8_t  val);
> -void outw(uint16_t addr, uint16_t val);
> -void outl(uint16_t addr, uint32_t val);
> +static inline void outb(uint16_t addr, uint8_t val)
> +{
> +    asm volatile ( "outb %%al, %%dx" :: "d" (addr), "a" (val) );

I'm inclined to ask to use "outb %1, %0" here (and similarly below).
I also wonder whether at least all the OUTs shouldn't also gain a
"memory" clobber.

> +}
> +
> +static inline void outw(uint16_t addr, uint16_t val)
> +{
> +    asm volatile ( "outw %%ax, %%dx" :: "d" (addr), "a" (val) );
> +}
> +
> +static inline void outl(uint16_t addr, uint32_t val)
> +{
> +    asm volatile ( "outl %%eax, %%dx" :: "d" (addr), "a" (val) );
> +}
>  
>  /* I/O input */
> -uint8_t  inb(uint16_t addr);
> -uint16_t inw(uint16_t addr);
> -uint32_t inl(uint16_t addr);
> +static inline uint8_t inb(uint16_t addr)
> +{
> +    uint8_t val;
> +
> +    asm volatile ( "inb %%dx,%%al" : "=a" (val) : "d" (addr) );

Would you mind adding blanks after the comma here and below?

> +
> +    return val;
> +}
> +
> +static inline uint16_t inw(uint16_t addr)
> +{
> +    uint16_t val;
> +
> +    asm volatile ( "inw %%dx,%%ax" : "=a" (val) : "d" (addr) );
> +
> +    return val;
> +}
> +
> +static inline uint32_t inl(uint16_t addr)
> +{
> +    uint32_t val;
> +
> +    asm volatile ( "inl %%dx,%%eax" : "=a" (val) : "d" (addr) );
> +
> +    return val;
> +}
>  
>  /* CMOS access */
>  uint8_t cmos_inb(uint8_t idx);
>  void cmos_outb(uint8_t idx, uint8_t val);
>  
>  /* APIC access */
> -uint32_t ioapic_read(uint32_t reg);
> -void ioapic_write(uint32_t reg, uint32_t val);
> -uint32_t lapic_read(uint32_t reg);
> -void lapic_write(uint32_t reg, uint32_t val);
> +static inline uint32_t ioapic_read(uint32_t reg)
> +{
> +    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x00) = reg;
> +    return *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x10);
> +}
> +
> +static inline void ioapic_write(uint32_t reg, uint32_t val)
> +{
> +    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x00) = reg;
> +    *(volatile uint32_t *)(IOAPIC_BASE_ADDRESS + 0x10) = val;
> +}
> +
> +#define LAPIC_BASE_ADDRESS  0xfee00000

Seeing this #define here, does there anything stand in the way of
putting IOAPIC_BASE_ADDRESS next to the inline functions as well?

Jan

> +static inline uint32_t lapic_read(uint32_t reg)
> +{
> +    return *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg);
> +}
> +
> +static inline void lapic_write(uint32_t reg, uint32_t val)
> +{
> +    *(volatile uint32_t *)(LAPIC_BASE_ADDRESS + reg) = val;
> +}
>  
>  /* PCI access */
>  uint32_t pci_read(uint32_t devfn, uint32_t reg, uint32_t len);



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE
  2022-08-25  7:20   ` Jan Beulich
@ 2023-03-24 11:18     ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2023-03-24 11:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 25/08/2022 8:20 am, Jan Beulich wrote:
> On 24.08.2022 12:59, Andrew Cooper wrote:
>> HVMLoader is not relocatable in memory, and 32bit PIC code has a large
>> overhead.  Build it as non-relocatable.
>>
>> Bloat-o-meter reports a net:
>>   add/remove: 0/0 grow/shrink: 3/107 up/down: 14/-3370 (-3356)
>>
>> No functional change.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: Jan Beulich <JBeulich@suse.com>
>> CC: Roger Pau Monné <roger.pau@citrix.com>
>> CC: Wei Liu <wl@xen.org>
>> ---
>>  tools/firmware/hvmloader/Makefile | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/firmware/hvmloader/Makefile
>> b/tools/firmware/hvmloader/Makefile
>> index 4f31c881613c..eb757819274b 100644
>> --- a/tools/firmware/hvmloader/Makefile
>> +++ b/tools/firmware/hvmloader/Makefile
>> @@ -23,7 +23,8 @@ include $(XEN_ROOT)/tools/firmware/Rules.mk
>>  # SMBIOS spec requires format mm/dd/yyyy
>>  SMBIOS_REL_DATE ?= $(shell date +%m/%d/%Y)
>>  
>> -CFLAGS += $(CFLAGS_xeninclude)
>> +CFLAGS += $(CFLAGS_xeninclude) -fno-pic
>> +$(call cc-option-add,CFLAGS,-no-pie)
>
> This is supposed to be coming from EMBEDDED_EXTRA_CFLAGS, if only
> it was spelled correctly there. See the patch just sent. This line
> (see that other patch) is meaningless anyway, as we don't use
> $(CFLAGS) for linking here. So with it dropped
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> I do think though that the description could do with some expanding,
> as I don't think -fpic or -fPIC is the default normally. I suppose
> it's only specific distros which make this the default.

Yeah, for ASLR reasons, but that covers ~all of our downstream users.

I'll tweak the commit message and drop the PIE part.

~Andrew


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] x86/hvmloader: SMP improvements
  2022-08-24 14:21   ` Jan Beulich
@ 2023-03-24 11:46     ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2023-03-24 11:46 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Roger Pau Monné, Wei Liu, Xen-devel

On 24/08/2022 3:21 pm, Jan Beulich wrote:
> On 24.08.2022 12:59, Andrew Cooper wrote:
>
>> -    ap_callin = 1;
>> +
>> +    /*
>> +     * Call in to the BSP.  For APs, take ourselves offline.
>> +     *
>> +     * We must not use the stack after calling in to the BSP.
>> +     */
>> +    asm volatile (
>> +        "    movb $1, ap_callin          \n"
>> +
>> +        "    test %[icr2], %[icr2]       \n"
>> +        "    jz   .Lbsp                  \n"
>
> Are we intending to guarantee going forward that the BSP always has
> APIC ID zero?

It's currently true, and I doubt that will change, but I prefer the
suggestion to not call this at all on the BSP.

>
>> +        "    movl %[icr2], %[ICR2]       \n"
>> +        "    movl %[init], %[ICR1]       \n"
>> +        "1:  hlt                         \n"
>> +        "    jmp  1b                     \n"
>
> The use of the function for the BSP is questionable anyway. What is
> really needed is the call to cacheattr_init(). I'm inclined to
> suggest to move to something like
>
> void smp_initialise(void)
> {
>    unsigned int i, nr_cpus = hvm_info->nr_vcpus;
>
>    cacheattr_init();
>
>    if ( nr_cpus <= 1 )
>        return;
>
>    memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end -
> ap_boot_start);
>
>    printf("Multiprocessor initialisation:\n");
>    for ( i = 1; i < nr_cpus; i++ )
>        boot_cpu(i);
> }
>
> thus eliminating bogus output when there's just one vCPU.
> Then the function here can become noreturn (which I was about to suggest
> until spotting that for the BSP the function actually does return).

Dropping the printk() isn't nice, because you'll then get unqualified
information from cacheattr_init().

I'll see if I can rearrange this a bit more nicely.

>
>> +        ".Lbsp:                          \n"
>> +        :
>> +        : [icr2] "r" (SET_APIC_DEST_FIELD(LAPIC_ID(cpu))),
>> +          [init] "i" (APIC_DM_INIT),
>> +          [ICR1] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR)),
>> +          [ICR2] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR2))
>> +        : "memory" );
>
> Can't you use APIC_DEST_SELF now, avoiding the need to fiddle
> with ICR2?

No.  Fixed is the only message type which can use self or all-inc-self. 
All others are only permitted to use the all-excluding-self.

This makes sense as a consequence of likely shortcuts taking when
integrating the LAPIC into the core.  Either way, it's documented
behaviour now, however inconvenient this is for this case.

~Andrew


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-03-24 11:47 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-24 10:59 [PATCH 0/4] x86/hvmloader: Fixes/improvements Andrew Cooper
2022-08-24 10:59 ` [PATCH 1/4] x86/hvmloader: SMP improvements Andrew Cooper
2022-08-24 14:21   ` Jan Beulich
2023-03-24 11:46     ` Andrew Cooper
2022-08-24 10:59 ` [PATCH 2/4] x86/hvmloader: Don't build as PIC/PIE Andrew Cooper
2022-08-25  7:20   ` Jan Beulich
2023-03-24 11:18     ` Andrew Cooper
2022-08-24 10:59 ` [PATCH 3/4] x86/hvmloader: Don't override stddef.h Andrew Cooper
2022-08-25  8:03   ` Jan Beulich
2022-08-24 10:59 ` [PATCH 4/4] x86/hvmloader: Move various helpers to being static inlines Andrew Cooper
2022-08-25  8:14   ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.