The following patches fix some realmode test compatibility issues between gcc and clang. Please look at the "x86: realmode: use inline asm to get stack pointer" patch closely. I'm fairly certain it's the correct fix, but would like confirmation. Thanks! Bill Wendling (2): x86: realmode: explicitly copy structure to avoid memcpy x86: realmode: use inline asm to get stack pointer x86/realmode.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) -- 2.23.0.700.g56cf767bdb-goog
Clang prefers to use a "mempcy" (or equivalent) to copy the "regs" structure. This doesn't work in 16-bit mode, as it will end up copying over half the number of bytes. GCC performs a field-by-field copy of the structure, so force clang to do the same thing. Signed-off-by: Bill Wendling <morbo@google.com> --- x86/realmode.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/x86/realmode.c b/x86/realmode.c index 303d093..cf45fd6 100644 --- a/x86/realmode.c +++ b/x86/realmode.c @@ -117,6 +117,19 @@ struct regs { u32 eip, eflags; }; +#define COPY_REG(name, dst, src) (dst).name = (src).name +#define COPY_REGS(dst, src) \ + COPY_REG(eax, dst, src); \ + COPY_REG(ebx, dst, src); \ + COPY_REG(ecx, dst, src); \ + COPY_REG(edx, dst, src); \ + COPY_REG(esi, dst, src); \ + COPY_REG(edi, dst, src); \ + COPY_REG(esp, dst, src); \ + COPY_REG(ebp, dst, src); \ + COPY_REG(eip, dst, src); \ + COPY_REG(eflags, dst, src) + struct table_descr { u16 limit; void *base; @@ -148,11 +161,11 @@ static void exec_in_big_real_mode(struct insn_desc *insn) extern u8 test_insn[], test_insn_end[]; for (i = 0; i < insn->len; ++i) - test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; + test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; for (; i < test_insn_end - test_insn; ++i) test_insn[i] = 0x90; // nop - save = inregs; + COPY_REGS(save, inregs); asm volatile( "lgdtl %[gdt_descr] \n\t" "mov %%cr0, %[tmp] \n\t" @@ -196,7 +209,7 @@ static void exec_in_big_real_mode(struct insn_desc *insn) : [gdt_descr]"m"(gdt_descr), [bigseg]"r"((short)16) : "cc", "memory" ); - outregs = save; + COPY_REGS(outregs, save); } #define R_AX 1 -- 2.23.0.700.g56cf767bdb-goog
It's fragile to try to retrieve the stack pointer by taking the address of a variable on the stack. For instance, clang reserves more stack space than gcc here, indicating that the variable may not be at the start of the stack. Instead of relying upon this to work, retrieve the "%rbp" value, which contains the value of "%rsp" before stack allocation. Signed-off-by: Bill Wendling <morbo@google.com> --- x86/realmode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x86/realmode.c b/x86/realmode.c index cf45fd6..7c89dd1 100644 --- a/x86/realmode.c +++ b/x86/realmode.c @@ -518,11 +518,12 @@ extern void retf_imm(void); static void test_call(void) { - u32 esp[16]; u32 addr; inregs = (struct regs){ 0 }; - inregs.esp = (u32)esp; + + // At this point the original stack pointer is in %ebp. + asm volatile ("mov %%ebp, %0" : "=rm"(inregs.esp)); MK_INSN(call1, "mov $test_function, %eax \n\t" "call *%eax\n\t"); -- 2.23.0.700.g56cf767bdb-goog
On Sat, Oct 12, 2019 at 4:59 PM Bill Wendling <morbo@google.com> wrote: > > Clang prefers to use a "mempcy" (or equivalent) to copy the "regs" > structure. This doesn't work in 16-bit mode, as it will end up copying > over half the number of bytes. GCC performs a field-by-field copy of the > structure, so force clang to do the same thing. > > Signed-off-by: Bill Wendling <morbo@google.com> > --- > x86/realmode.c | 19 ++++++++++++++++--- > 1 file changed, 16 insertions(+), 3 deletions(-) > > diff --git a/x86/realmode.c b/x86/realmode.c > index 303d093..cf45fd6 100644 > --- a/x86/realmode.c > +++ b/x86/realmode.c > @@ -117,6 +117,19 @@ struct regs { > u32 eip, eflags; > }; > > +#define COPY_REG(name, dst, src) (dst).name = (src).name > +#define COPY_REGS(dst, src) \ > + COPY_REG(eax, dst, src); \ > + COPY_REG(ebx, dst, src); \ > + COPY_REG(ecx, dst, src); \ > + COPY_REG(edx, dst, src); \ > + COPY_REG(esi, dst, src); \ > + COPY_REG(edi, dst, src); \ > + COPY_REG(esp, dst, src); \ > + COPY_REG(ebp, dst, src); \ > + COPY_REG(eip, dst, src); \ > + COPY_REG(eflags, dst, src) > + This seems very fragile, too. Can we introduce our own address-space-size-independent "memcpy" and use that? I'm thinking something like: static void bytecopy(void *dst, void *src, u32 count) { asm volatile("rep movsb" : "+D" (dst), "+S" (src), "+c" (count) : : "cc"); } > struct table_descr { > u16 limit; > void *base; > @@ -148,11 +161,11 @@ static void exec_in_big_real_mode(struct insn_desc *insn) > extern u8 test_insn[], test_insn_end[]; > > for (i = 0; i < insn->len; ++i) > - test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; > + test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; > for (; i < test_insn_end - test_insn; ++i) > test_insn[i] = 0x90; // nop > > - save = inregs; > + COPY_REGS(save, inregs); > asm volatile( > "lgdtl %[gdt_descr] \n\t" > "mov %%cr0, %[tmp] \n\t" > @@ -196,7 +209,7 @@ static void exec_in_big_real_mode(struct insn_desc *insn) > : [gdt_descr]"m"(gdt_descr), [bigseg]"r"((short)16) > : "cc", "memory" > ); > - outregs = save; > + COPY_REGS(outregs, save); > } > > #define R_AX 1 > -- > 2.23.0.700.g56cf767bdb-goog >
On Sat, Oct 12, 2019 at 4:59 PM Bill Wendling <morbo@google.com> wrote: > > It's fragile to try to retrieve the stack pointer by taking the address > of a variable on the stack. For instance, clang reserves more stack > space than gcc here, indicating that the variable may not be at the > start of the stack. Instead of relying upon this to work, retrieve the > "%rbp" value, which contains the value of "%rsp" before stack > allocation. > > Signed-off-by: Bill Wendling <morbo@google.com> > --- > x86/realmode.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/x86/realmode.c b/x86/realmode.c > index cf45fd6..7c89dd1 100644 > --- a/x86/realmode.c > +++ b/x86/realmode.c > @@ -518,11 +518,12 @@ extern void retf_imm(void); > > static void test_call(void) > { > - u32 esp[16]; > u32 addr; > > inregs = (struct regs){ 0 }; > - inregs.esp = (u32)esp; > + > + // At this point the original stack pointer is in %ebp. > + asm volatile ("mov %%ebp, %0" : "=rm"(inregs.esp)); I don't think we should assume the existence of frame pointers. Moreover, I think %esp is actually the value that should be saved here, regardless of how large the current frame is. > MK_INSN(call1, "mov $test_function, %eax \n\t" > "call *%eax\n\t"); > -- > 2.23.0.700.g56cf767bdb-goog >
On Wed, Oct 16, 2019 at 12:53 PM Jim Mattson <jmattson@google.com> wrote: > > On Sat, Oct 12, 2019 at 4:59 PM Bill Wendling <morbo@google.com> wrote: > > > > It's fragile to try to retrieve the stack pointer by taking the address > > of a variable on the stack. For instance, clang reserves more stack > > space than gcc here, indicating that the variable may not be at the > > start of the stack. Instead of relying upon this to work, retrieve the > > "%rbp" value, which contains the value of "%rsp" before stack > > allocation. > > > > Signed-off-by: Bill Wendling <morbo@google.com> > > --- > > x86/realmode.c | 5 +++-- > > 1 file changed, 3 insertions(+), 2 deletions(-) > > > > diff --git a/x86/realmode.c b/x86/realmode.c > > index cf45fd6..7c89dd1 100644 > > --- a/x86/realmode.c > > +++ b/x86/realmode.c > > @@ -518,11 +518,12 @@ extern void retf_imm(void); > > > > static void test_call(void) > > { > > - u32 esp[16]; > > u32 addr; > > > > inregs = (struct regs){ 0 }; > > - inregs.esp = (u32)esp; > > + > > + // At this point the original stack pointer is in %ebp. > > + asm volatile ("mov %%ebp, %0" : "=rm"(inregs.esp)); > > I don't think we should assume the existence of frame pointers. > Moreover, I think %esp is actually the value that should be saved > here, regardless of how large the current frame is. Never mind. After taking a closer look, esp[] is meant to provide stack space for the code under test, but inregs.esp should point to the top of this stack rather than the bottom. This is apparently a long-standing bug, similar to the one Avi fixed for test_long_jmp() in commit 4aa22949 ("realmode: fix esp in long jump test"). For consistency with test_long_jmp, I'd suggest changing the inregs.esp assignment to: inregs.esp = (u32)(esp+16); Note that you absolutely must preserve the esp[] array! > > MK_INSN(call1, "mov $test_function, %eax \n\t" > > "call *%eax\n\t"); > > -- > > 2.23.0.700.g56cf767bdb-goog > >
The following patches fix some realmode test compatibility issues between gcc and clang. This update should address the comments by Jim Mattson. Re explicitly copying the regs structure, I tried to use the inline asm code you suggested, but wasn't able to get it to work for me. It never seemed to copy the structure correctly. Doing the loop explicitly solved the issue. Bill Wendling (2): x86: realmode: explicitly copy regs structure x86: realmode: fix esp in call test x86/realmode.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) -- 2.23.0.700.g56cf767bdb-goog
Clang prefers to use a "rep;movsl" (or equivalent) to copy the "regs" structure. This doesn't work in 16-bit mode, as it will end up copying over half the number of bytes. Avoid this by copying over the structure a byte at a time. Signed-off-by: Bill Wendling <morbo@google.com> --- x86/realmode.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/x86/realmode.c b/x86/realmode.c index 303d093..41b8592 100644 --- a/x86/realmode.c +++ b/x86/realmode.c @@ -140,6 +140,16 @@ struct insn_desc { static struct regs inregs, outregs; +static inline void copy_regs(struct regs *dst_regs, struct regs *src_regs) +{ + char *dst = (char*)dst_regs; + char *src = (char*)src_regs; + u32 i; + + for (i = 0; i < sizeof(struct regs); i++) + dst[i] = src[i]; +} + static void exec_in_big_real_mode(struct insn_desc *insn) { unsigned long tmp; @@ -148,11 +158,11 @@ static void exec_in_big_real_mode(struct insn_desc *insn) extern u8 test_insn[], test_insn_end[]; for (i = 0; i < insn->len; ++i) - test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; + test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; for (; i < test_insn_end - test_insn; ++i) test_insn[i] = 0x90; // nop - save = inregs; + copy_regs(&save, &inregs); asm volatile( "lgdtl %[gdt_descr] \n\t" "mov %%cr0, %[tmp] \n\t" @@ -196,7 +206,8 @@ static void exec_in_big_real_mode(struct insn_desc *insn) : [gdt_descr]"m"(gdt_descr), [bigseg]"r"((short)16) : "cc", "memory" ); - outregs = save; + copy_regs(&outregs, &save); + } #define R_AX 1 -- 2.23.0.700.g56cf767bdb-goog
esp needs to point at the end of the stack, or it will corrupt memory. Signed-off-by: Bill Wendling <morbo@google.com> This is a port of Avi Kivity patch for the long jump test: 4aa229495b0e4159642b4a77e9adfdc81501c095. Signed-off-by: Bill Wendling <morbo@google.com> --- x86/realmode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x86/realmode.c b/x86/realmode.c index 41b8592..f318910 100644 --- a/x86/realmode.c +++ b/x86/realmode.c @@ -520,7 +520,7 @@ static void test_call(void) u32 addr; inregs = (struct regs){ 0 }; - inregs.esp = (u32)esp; + inregs.esp = (u32)(esp+16); MK_INSN(call1, "mov $test_function, %eax \n\t" "call *%eax\n\t"); -- 2.23.0.700.g56cf767bdb-goog
On Wed, Oct 16, 2019 at 6:25 PM Bill Wendling <morbo@google.com> wrote:
>
> esp needs to point at the end of the stack, or it will corrupt memory.
>
> Signed-off-by: Bill Wendling <morbo@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
On 13/10/19 01:58, Bill Wendling wrote: > Clang prefers to use a "mempcy" (or equivalent) to copy the "regs" > structure. This doesn't work in 16-bit mode, as it will end up copying > over half the number of bytes. GCC performs a field-by-field copy of the > structure, so force clang to do the same thing. Do you mean that: 1) the compiler is compiling a "call memcpy", where the bytes "rep movsl" are interpreted as "rep movsw" 2) the compiler's integrated assembler is including the bytes for "rep movsl", but in 16-bit mode they are "rep movsw" 3) something else The -m16 or -no-integrated-as flags perhaps can help? Paolo > Signed-off-by: Bill Wendling <morbo@google.com> > --- > x86/realmode.c | 19 ++++++++++++++++--- > 1 file changed, 16 insertions(+), 3 deletions(-) > > diff --git a/x86/realmode.c b/x86/realmode.c > index 303d093..cf45fd6 100644 > --- a/x86/realmode.c > +++ b/x86/realmode.c > @@ -117,6 +117,19 @@ struct regs { > u32 eip, eflags; > }; > > +#define COPY_REG(name, dst, src) (dst).name = (src).name > +#define COPY_REGS(dst, src) \ > + COPY_REG(eax, dst, src); \ > + COPY_REG(ebx, dst, src); \ > + COPY_REG(ecx, dst, src); \ > + COPY_REG(edx, dst, src); \ > + COPY_REG(esi, dst, src); \ > + COPY_REG(edi, dst, src); \ > + COPY_REG(esp, dst, src); \ > + COPY_REG(ebp, dst, src); \ > + COPY_REG(eip, dst, src); \ > + COPY_REG(eflags, dst, src) > + > struct table_descr { > u16 limit; > void *base; > @@ -148,11 +161,11 @@ static void exec_in_big_real_mode(struct insn_desc *insn) > extern u8 test_insn[], test_insn_end[]; > > for (i = 0; i < insn->len; ++i) > - test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; > + test_insn[i] = ((u8 *)(unsigned long)insn->ptr)[i]; > for (; i < test_insn_end - test_insn; ++i) > test_insn[i] = 0x90; // nop > > - save = inregs; > + COPY_REGS(save, inregs); > asm volatile( > "lgdtl %[gdt_descr] \n\t" > "mov %%cr0, %[tmp] \n\t" > @@ -196,7 +209,7 @@ static void exec_in_big_real_mode(struct insn_desc *insn) > : [gdt_descr]"m"(gdt_descr), [bigseg]"r"((short)16) > : "cc", "memory" > ); > - outregs = save; > + COPY_REGS(outregs, save); > } > > #define R_AX 1 >
On 16/10/19 23:52, Jim Mattson wrote:
> Never mind. After taking a closer look, esp[] is meant to provide
> stack space for the code under test, but inregs.esp should point to
> the top of this stack rather than the bottom. This is apparently a
> long-standing bug, similar to the one Avi fixed for test_long_jmp()
> in commit 4aa22949 ("realmode: fix esp in long jump test").
>
> For consistency with test_long_jmp, I'd suggest changing the
> inregs.esp assignment to:
> inregs.esp = (u32)(esp+16);
>
> Note that you absolutely must preserve the esp[] array!
>
Agreed, thanks Jim for the review.
Paolo
On 17/10/19 03:25, Bill Wendling wrote:
> diff --git a/x86/realmode.c b/x86/realmode.c
> index 41b8592..f318910 100644
> --- a/x86/realmode.c
> +++ b/x86/realmode.c
> @@ -520,7 +520,7 @@ static void test_call(void)
> u32 addr;
>
> inregs = (struct regs){ 0 };
> - inregs.esp = (u32)esp;
> + inregs.esp = (u32)(esp+16);
Applied with
+ inregs.esp = (u32)&esp[ARRAY_SIZE(esp)];
Paolo
On Mon, Oct 21, 2019 at 8:43 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 17/10/19 03:25, Bill Wendling wrote:
> > diff --git a/x86/realmode.c b/x86/realmode.c
> > index 41b8592..f318910 100644
> > --- a/x86/realmode.c
> > +++ b/x86/realmode.c
> > @@ -520,7 +520,7 @@ static void test_call(void)
> > u32 addr;
> >
> > inregs = (struct regs){ 0 };
> > - inregs.esp = (u32)esp;
> > + inregs.esp = (u32)(esp+16);
>
> Applied with
>
> + inregs.esp = (u32)&esp[ARRAY_SIZE(esp)];
>
> Paolo
Would you mind doing the same for test_long_jmp?