All of lore.kernel.org
 help / color / mirror / Atom feed
* IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
@ 2021-09-15 12:23 Ondrej Zary
  2021-09-16 17:09 ` Thomas Gleixner
  0 siblings, 1 reply; 24+ messages in thread
From: Ondrej Zary @ 2021-09-15 12:23 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: x86, linux-kernel

Hello,
after upgrading Debian from 10 (kernel 4.19.194) to 11 (kernel 5.10.46) on HP
DL380 G4 servers, hpasmd segfaults. Booting the 4.19.194 kernel allows hpasmd
to work.

hpasmd is a binary-only crap that is required on these servers for the fan
regulation to work. Without it, fans run at full speed, producing too much
noise and consuming power.
The last version that works on these servers comes from hp-health_8.7.0.1.2-5_amd64.deb package
(found on HP_ProLiant_Value_Add_Software-8.70-10-6.iso).
Newer versions of hp-health package lacks hpasmd binary and won't run (at
least iLO 2 is probably required).

Details:
[   11.539938] process 'hp/hp-health/bin/hpasmd' started with executable stack
[   11.720404] traps: hpasmd[389] general protection fault ip:f7ccc09b sp:ffb369e4 error:0 in mem[f7ccc000+3000]

# strace -f /opt/hp/hp-health/bin/hpasmd
[...]
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d64000
close(3)                                = 0
munmap(0xf7d64000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d64000
close(3)                                = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7f86000
close(3)                                = 0
munmap(0xf7d64000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xe0000) = 0xf7d64000
close(3)                                = 0
munmap(0xf7d64000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 32767, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xc8000) = 0xf7d6c000
close(3)                                = 0
munmap(0xf7d6c000, 32767)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xd0000) = 0xf7d64000
close(3)                                = 0
munmap(0xf7d64000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 16384, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xfff8c000) = 0xf7f82000
close(3)                                = 0
munmap(0xf7f82000, 16384)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xfffa6000) = 0xf7f84000
close(3)                                = 0
mprotect(0x80b6000, 8192, PROT_READ|PROT_WRITE|PROT_EXEC) = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d64000
close(3)                                = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8342, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d61000
close(3)                                = 0
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} ---
+++ killed by SIGSEGV +++

First thought that the problem is caused by /dev/mem restrictions but it's not.
The same problem affects /opt/hp/hp-health/bin/IrqRouteTbl:
# strace -f /opt/hp/hp-health/bin/IrqRouteTbl
execve("/opt/hp/hp-health/bin/IrqRouteTbl", ["/opt/hp/hp-health/bin/IrqRouteTb"...], 0x7ffe4dfd1148 /* 22 vars */) = 0
[ Process PID=494 runs in 32 bit mode. ]
brk(NULL)                               = 0x94ac000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7fc6000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=20522, ...}) = 0
mmap2(NULL, 20522, PROT_READ, MAP_PRIVATE, 3, 0) = 0xf7fc0000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib32/libc.so.6", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\357\1\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1993968, ...}) = 0
mmap2(NULL, 2002876, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xf7dd7000
mprotect(0xf7df4000, 1859584, PROT_NONE) = 0
mmap2(0xf7df4000, 1396736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1d000) = 0xf7df4000
mmap2(0xf7f49000, 458752, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x172000) = 0xf7f49000
mmap2(0xf7fba000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e2000) = 0xf7fba000
mmap2(0xf7fbe000, 8124, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xf7fbe000
close(3)                                = 0
set_thread_area({entry_number=-1, base_addr=0xf7fc7100, limit=0x0fffff, seg_32bit=1, contents=0, read_exec_only=0, limit_in_pages=1, seg_not_present=0, useable=1}) = 0 (entry_number=12)
mprotect(0xf7fba000, 8192, PROT_READ)   = 0
mprotect(0xf7ff8000, 4096, PROT_READ)   = 0
munmap(0xf7fc0000, 20522)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7dc7000
close(3)                                = 0
munmap(0xf7dc7000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7dc7000
close(3)                                = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7fc4000
close(3)                                = 0
munmap(0xf7dc7000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xe0000) = 0xf7dc7000
close(3)                                = 0
munmap(0xf7dc7000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 32767, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xc8000) = 0xf7dcf000
close(3)                                = 0
munmap(0xf7dcf000, 32767)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xd0000) = 0xf7dc7000
close(3)                                = 0
munmap(0xf7dc7000, 65535)               = 0
iopl(3)                                 = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8342, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7fc1000
close(3)                                = 0
iopl(3)                                 = 0
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} ---
+++ killed by SIGSEGV +++
Segmentation fault

Noticed the iopl(3) call. Checked out a kernel before "x86/iopl: Remove legacy IOPL option"
(a24ca9976843156eabbc5f2d798954b5674d1b61) and built with CONFIG_X86_IOPL_LEGACY.
It works!.

# uname -a
Linux edi2 5.4.0-rc7+ #428 SMP Wed Sep 15 14:01:53 CEST 2021 x86_64 GNU/Linux
# hpasmcli -s "show fans"

Fan  Location        Present Speed  of max  Redundant  Partner  Hot-pluggable
---  --------        ------- -----  ------  ---------  -------  -------------
#1   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        2        Yes
#2   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#3   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#4   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#5   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#6   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#7   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes
#8   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes

Building the same kernel with CONFIG_X86_IOPL_EMULATION breaks it again.

I even disassembled /opt/hp/hp-health/bin/IrqRouteTbl (it's only 5952 bytes
and does not use any hp libs). There's no CLI, only a couple of INs and OUTs:
 8048f3e:       66 ba 85 00             mov    dx,0x85
 8048f42:       ee                      out    dx,al
 8048f43:       66 ba 84 00             mov    dx,0x84
 8048f47:       88 e0                   mov    al,ah
 8048f49:       ee                      out    dx,al
...
 8048f5f:       66 ba d4 0c             mov    dx,0xcd4
 8048f63:       ec                      in     al,dx
 8048f64:       0c 80                   or     al,0x80
 8048f66:       ee                      out    dx,al
 8048f67:       66 ba d4 0c             mov    dx,0xcd4
 8048f6b:       ec                      in     al,dx
...
 8048f75:       66 ba 84 00             mov    dx,0x84
 8048f79:       66 b8 00 00             mov    ax,0x0
 8048f7d:       ee                      out    dx,al
 8048f7e:       66 ba 85 00             mov    dx,0x85
 8048f82:       ee                      out    dx,al

But I still don't know what's going on.

-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
  2021-09-15 12:23 IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers Ondrej Zary
@ 2021-09-16 17:09 ` Thomas Gleixner
  2021-09-16 20:27   ` Ondrej Zary
  0 siblings, 1 reply; 24+ messages in thread
From: Thomas Gleixner @ 2021-09-16 17:09 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: x86, linux-kernel

Ondrej,

On Wed, Sep 15 2021 at 14:23, Ondrej Zary wrote:
> after upgrading Debian from 10 (kernel 4.19.194) to 11 (kernel 5.10.46) on HP
> DL380 G4 servers, hpasmd segfaults. Booting the 4.19.194 kernel allows hpasmd
> to work.
>
> Noticed the iopl(3) call. Checked out a kernel before "x86/iopl: Remove legacy IOPL option"
> (a24ca9976843156eabbc5f2d798954b5674d1b61) and built with CONFIG_X86_IOPL_LEGACY.
> It works!.
>
> I even disassembled /opt/hp/hp-health/bin/IrqRouteTbl (it's only 5952 bytes
> and does not use any hp libs). There's no CLI, only a couple of INs and OUTs:
> ...
>  8048f75:       66 ba 84 00             mov    dx,0x84
>  8048f79:       66 b8 00 00             mov    ax,0x0
>  8048f7d:       ee                      out    dx,al
>  8048f7e:       66 ba 85 00             mov    dx,0x85
>  8048f82:       ee                      out    dx,al
>
> But I still don't know what's going on.

That's weird. Let me think about a way to debug that. I just ran a
trivial test program which issues iopl(3) and reads all ports from
0-65535. That works like a charm.

#include <stdio.h>
#include <sys/io.h>

int main(void)
{
	unsigned int i;
	int ret;

	ret = iopl(3);
	if (ret)
		return ret;

	for (i = 0; i < 65536; i++)
		printf("%5u: %02x\n", i, inb(i));

	return 0;
}

Which CPU is in that machine?

Can you please run that failing program with GDB and figure out which
instruction causes #GP and what the register content is.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
  2021-09-16 17:09 ` Thomas Gleixner
@ 2021-09-16 20:27   ` Ondrej Zary
  2021-09-16 21:05     ` Peter Zijlstra
  2021-09-16 21:25     ` IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers Thomas Gleixner
  0 siblings, 2 replies; 24+ messages in thread
From: Ondrej Zary @ 2021-09-16 20:27 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: x86, linux-kernel

On Thursday 16 September 2021 19:09:31 Thomas Gleixner wrote:
> Ondrej,
> 
> On Wed, Sep 15 2021 at 14:23, Ondrej Zary wrote:
> > after upgrading Debian from 10 (kernel 4.19.194) to 11 (kernel 5.10.46) on HP
> > DL380 G4 servers, hpasmd segfaults. Booting the 4.19.194 kernel allows hpasmd
> > to work.
> >
> > Noticed the iopl(3) call. Checked out a kernel before "x86/iopl: Remove legacy IOPL option"
> > (a24ca9976843156eabbc5f2d798954b5674d1b61) and built with CONFIG_X86_IOPL_LEGACY.
> > It works!.
> >
> > I even disassembled /opt/hp/hp-health/bin/IrqRouteTbl (it's only 5952 bytes
> > and does not use any hp libs). There's no CLI, only a couple of INs and OUTs:
> > ...
> >  8048f75:       66 ba 84 00             mov    dx,0x84
> >  8048f79:       66 b8 00 00             mov    ax,0x0
> >  8048f7d:       ee                      out    dx,al
> >  8048f7e:       66 ba 85 00             mov    dx,0x85
> >  8048f82:       ee                      out    dx,al
> >
> > But I still don't know what's going on.
> 
> That's weird. Let me think about a way to debug that. I just ran a
> trivial test program which issues iopl(3) and reads all ports from
> 0-65535. That works like a charm.
> 
> #include <stdio.h>
> #include <sys/io.h>
> 
> int main(void)
> {
> 	unsigned int i;
> 	int ret;
> 
> 	ret = iopl(3);
> 	if (ret)
> 		return ret;
> 
> 	for (i = 0; i < 65536; i++)
> 		printf("%5u: %02x\n", i, inb(i));
> 
> 	return 0;
> }
> 
> Which CPU is in that machine?

A Netburst-based Xeon (1-core, HT):
# cat /proc/cpuinfo
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 15
model           : 4
model name      : Intel(R) Xeon(TM) CPU 3.00GHz
stepping        : 3
microcode       : 0x5
cpu MHz         : 2999.868
cache size      : 2048 KB
physical id     : 0
siblings        : 2
core id         : 0
cpu cores       : 1
apicid          : 0
initial apicid  : 0
fpu             : yes
fpu_exception   : yes
cpuid level     : 5
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc pebs bts nopl cpuid pni dtes64 monitor ds_cpl cid cx16 xtpr pti
bugs            : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs
bogomips        : 5999.73
clflush size    : 64
cache_alignment : 128
address sizes   : 36 bits physical, 48 bits virtual
power management:

> Can you please run that failing program with GDB and figure out which
> instruction causes #GP and what the register content is.

(gdb) run
Starting program: /opt/hp/hp-health/bin/IrqRouteTbl

Program received signal SIGSEGV, Segmentation fault.
0xf7fc509b in ?? ()
(gdb) bt
#0  0xf7fc509b in ?? ()
#1  0x08048848 in ?? ()
#2  0x08048aa1 in ?? ()
#3  0x08048e05 in ?? ()
#4  0xf7df9e46 in __libc_start_main () from /lib32/libc.so.6
#5  0xf7ffd000 in ?? () from /lib/ld-linux.so.2
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
(gdb) x/3i $pc
=> 0xf7fc509b:  cli
   0xf7fc509c:  push   %ebp
   0xf7fc509d:  mov    %esp,%ebp

OMG, maybe is it calling into the mmapped BIOS area?


-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
  2021-09-16 20:27   ` Ondrej Zary
@ 2021-09-16 21:05     ` Peter Zijlstra
  2021-09-17  8:11       ` Ondrej Zary
  2021-09-16 21:25     ` IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers Thomas Gleixner
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-16 21:05 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: Thomas Gleixner, x86, linux-kernel

On Thu, Sep 16, 2021 at 10:27:17PM +0200, Ondrej Zary wrote:
> (gdb) run
> Starting program: /opt/hp/hp-health/bin/IrqRouteTbl
> 
> Program received signal SIGSEGV, Segmentation fault.
> 0xf7fc509b in ?? ()
> (gdb) bt
> #0  0xf7fc509b in ?? ()
> #1  0x08048848 in ?? ()
> #2  0x08048aa1 in ?? ()
> #3  0x08048e05 in ?? ()
> #4  0xf7df9e46 in __libc_start_main () from /lib32/libc.so.6
> #5  0xf7ffd000 in ?? () from /lib/ld-linux.so.2
> Backtrace stopped: previous frame inner to this frame (corrupt stack?)
> (gdb) x/3i $pc
> => 0xf7fc509b:  cli
>    0xf7fc509c:  push   %ebp
>    0xf7fc509d:  mov    %esp,%ebp
> 
> OMG, maybe is it calling into the mmapped BIOS area?

Lol... does something like the below (which *really* wants to be behind
something like sysctl.iopl_fake_if) work for you?

---

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a58800973aed..55c3904e656d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -528,6 +528,34 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 
 #define GPFSTR "general protection fault"
 
+bool fixup_iopl_exception(struct pt_regs *regs)
+{
+	struct thread_struct *t = &current->thread;
+	unsigned char buf[MAX_INSN_SIZE];
+	struct insn insn;
+	int nr_copied;
+
+	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3 || !regs)
+		return false;
+
+	nr_copied = insn_fetch_from_user(regs, buf);
+	if (nr_copied <= 0)
+		return false;
+
+	if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
+		return false;
+
+	if (insn.length != 1)
+		return false;
+
+	if (insn.opcode.bytes[0] != 0xfa &&
+	    insn.opcode.bytes[0] != 0xfb)
+		return false;
+
+	regs->ip += 1;
+	return true;
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +581,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	tsk = current;
 
 	if (user_mode(regs)) {
+		if (fixup_iopl_exception(regs))
+			goto exit;
+
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
  2021-09-16 20:27   ` Ondrej Zary
  2021-09-16 21:05     ` Peter Zijlstra
@ 2021-09-16 21:25     ` Thomas Gleixner
  1 sibling, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2021-09-16 21:25 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: x86, linux-kernel

On Thu, Sep 16 2021 at 22:27, Ondrej Zary wrote:
> On Thursday 16 September 2021 19:09:31 Thomas Gleixner wrote:
>> Can you please run that failing program with GDB and figure out which
>> instruction causes #GP and what the register content is.
>
> (gdb) run
> Starting program: /opt/hp/hp-health/bin/IrqRouteTbl
>
> Program received signal SIGSEGV, Segmentation fault.
> 0xf7fc509b in ?? ()
> (gdb) bt
> #0  0xf7fc509b in ?? ()
> #1  0x08048848 in ?? ()
> #2  0x08048aa1 in ?? ()
> #3  0x08048e05 in ?? ()
> #4  0xf7df9e46 in __libc_start_main () from /lib32/libc.so.6
> #5  0xf7ffd000 in ?? () from /lib/ld-linux.so.2
> Backtrace stopped: previous frame inner to this frame (corrupt stack?)
> (gdb) x/3i $pc
> => 0xf7fc509b:  cli
>    0xf7fc509c:  push   %ebp
>    0xf7fc509d:  mov    %esp,%ebp
>
> OMG, maybe is it calling into the mmapped BIOS area?

Pretty much so. From your initial report:

openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8342, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7fc1000
close(3)                                = 0
iopl(3)                                 = 0
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=NULL} ---

offset 0xf0000 of /dev/mem is clearly the BIOS area. Daft.

That's really qualiteee stuff.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers
  2021-09-16 21:05     ` Peter Zijlstra
@ 2021-09-17  8:11       ` Ondrej Zary
  2021-09-17  9:20         ` [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Ondrej Zary @ 2021-09-17  8:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, x86, linux-kernel

On Thursday 16 September 2021, Peter Zijlstra wrote:
> On Thu, Sep 16, 2021 at 10:27:17PM +0200, Ondrej Zary wrote:
> > (gdb) run
> > Starting program: /opt/hp/hp-health/bin/IrqRouteTbl
> > 
> > Program received signal SIGSEGV, Segmentation fault.
> > 0xf7fc509b in ?? ()
> > (gdb) bt
> > #0  0xf7fc509b in ?? ()
> > #1  0x08048848 in ?? ()
> > #2  0x08048aa1 in ?? ()
> > #3  0x08048e05 in ?? ()
> > #4  0xf7df9e46 in __libc_start_main () from /lib32/libc.so.6
> > #5  0xf7ffd000 in ?? () from /lib/ld-linux.so.2
> > Backtrace stopped: previous frame inner to this frame (corrupt stack?)
> > (gdb) x/3i $pc
> > => 0xf7fc509b:  cli
> >    0xf7fc509c:  push   %ebp
> >    0xf7fc509d:  mov    %esp,%ebp
> > 
> > OMG, maybe is it calling into the mmapped BIOS area?
> 
> Lol... does something like the below (which *really* wants to be behind
> something like sysctl.iopl_fake_if) work for you?
> 
> ---
> 
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index a58800973aed..55c3904e656d 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -528,6 +528,34 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
>  
>  #define GPFSTR "general protection fault"
>  
> +bool fixup_iopl_exception(struct pt_regs *regs)
> +{
> +	struct thread_struct *t = &current->thread;
> +	unsigned char buf[MAX_INSN_SIZE];
> +	struct insn insn;
> +	int nr_copied;
> +
> +	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3 || !regs)
> +		return false;
> +
> +	nr_copied = insn_fetch_from_user(regs, buf);
> +	if (nr_copied <= 0)
> +		return false;
> +
> +	if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
> +		return false;
> +
> +	if (insn.length != 1)
> +		return false;
> +
> +	if (insn.opcode.bytes[0] != 0xfa &&
> +	    insn.opcode.bytes[0] != 0xfb)
> +		return false;
> +
> +	regs->ip += 1;
> +	return true;
> +}
> +
>  DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
>  {
>  	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
> @@ -553,6 +581,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
>  	tsk = current;
>  
>  	if (user_mode(regs)) {
> +		if (fixup_iopl_exception(regs))
> +			goto exit;
> +
>  		tsk->thread.error_code = error_code;
>  		tsk->thread.trap_nr = X86_TRAP_GP;
>  
> 

Yeah, it works!

# uname -a
Linux edi2 5.15.0-rc1+ #429 SMP Fri Sep 17 08:45:36 CEST 2021 x86_64 GNU/Linux

# hpasmcli -s "show fans"

Fan  Location        Present Speed  of max  Redundant  Partner  Hot-pluggable
---  --------        ------- -----  ------  ---------  -------  -------------
#1   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        2        Yes
#2   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#3   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#4   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#5   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#6   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#7   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes
#8   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes

# strace -f /opt/hp/hp-health/bin/IrqRouteTbl
execve("/opt/hp/hp-health/bin/IrqRouteTbl", ["/opt/hp/hp-health/bin/IrqRouteTb"...], 0x7ffcdc970c48 /* 21 vars */) = 0
[ Process PID=469 runs in 32 bit mode. ]
brk(NULL)                               = 0x91c8000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7f47000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=20522, ...}) = 0
mmap2(NULL, 20522, PROT_READ, MAP_PRIVATE, 3, 0) = 0xf7f41000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib32/libc.so.6", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\357\1\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1993968, ...}) = 0
mmap2(NULL, 2002876, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xf7d58000
mprotect(0xf7d75000, 1859584, PROT_NONE) = 0
mmap2(0xf7d75000, 1396736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1d000) = 0xf7d75000
mmap2(0xf7eca000, 458752, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x172000) = 0xf7eca000
mmap2(0xf7f3b000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e2000) = 0xf7f3b000
mmap2(0xf7f3f000, 8124, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xf7f3f000
close(3)                                = 0
set_thread_area({entry_number=-1, base_addr=0xf7f48100, limit=0x0fffff, seg_32bit=1, contents=0, read_exec_only=0, limit_in_pages=1, seg_not_present=0, useable=1}) = 0 (entry_number=12)
mprotect(0xf7f3b000, 8192, PROT_READ)   = 0
mprotect(0xf7f79000, 4096, PROT_READ)   = 0
munmap(0xf7f41000, 20522)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d48000
close(3)                                = 0
munmap(0xf7d48000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7d48000
close(3)                                = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7f45000
close(3)                                = 0
munmap(0xf7d48000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xe0000) = 0xf7d48000
close(3)                                = 0
munmap(0xf7d48000, 65535)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 32767, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xc8000) = 0xf7d50000
close(3)                                = 0
munmap(0xf7d50000, 32767)               = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 65535, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xd0000) = 0xf7d48000
close(3)                                = 0
munmap(0xf7d48000, 65535)               = 0
iopl(3)                                 = 0
openat(AT_FDCWD, "/dev/mem", O_RDWR)    = 3
mmap2(NULL, 8342, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, 3, 0xf0000) = 0xf7f42000
close(3)                                = 0
iopl(3)                                 = 0
iopl(3)                                 = 0
brk(NULL)                               = 0x91c8000
brk(0x91e9000)                          = 0x91e9000
brk(0x91ea000)                          = 0x91ea000
openat(AT_FDCWD, "/opt/compaq/utils/IrqRouteTable", O_WRONLY|O_CREAT|O_TRUNC, 0666) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/opt/hp/hp-health/IrqRouteTable", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
write(3, "\n\10a\270\0b\270\0a\270\0b\270\0\1\0\6\10b\270\0a\270\0b\270\0a\270\0\2\0"..., 112) = 112
close(3)                                = 0
brk(0x91e9000)                          = 0x91e9000
exit_group(0)                           = ?
+++ exited with 0 +++



-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17  8:11       ` Ondrej Zary
@ 2021-09-17  9:20         ` Peter Zijlstra
  2021-09-17 10:29           ` Ondrej Zary
                             ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-17  9:20 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: Thomas Gleixner, x86, linux-kernel, Linus Torvalds

On Fri, Sep 17, 2021 at 10:11:31AM +0200, Ondrej Zary wrote:
> Yeah, it works!

w00t!! I've added a pr_err() to make sure people take note their
'software' is doing dodgy things.

---
Subject: x86/iopl: Fake iopl(3) CLI/STI usage
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Sep 2021 23:05:09 +0200

Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
scope") it's possible to emulate iopl(3) using ioperm(), except for
the CLI/STI usage.

Userspace CLI/STI usage is very dubious (read broken), since any
exception taken during that window can lead to rescheduling anyway (or
worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
discouraged and might even crash the system.

Of course, that won't stop people and HP has the dubious honour of
being the first vendor to be found using this in their hp-health
package.

In order to enable this 'software' to still 'work', have the #GP treat
the CLI/STI instructions as NOPs when iopl(3). Warn the user that
their program is doing dubious things.

Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
Reported-by: Ondrej Zary <linux@zary.sk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/traps.c |   38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -528,6 +528,41 @@ static enum kernel_gp_hint get_kernel_gp
 
 #define GPFSTR "general protection fault"
 
+bool fixup_iopl_exception(struct pt_regs *regs)
+{
+	struct thread_struct *t = &current->thread;
+	unsigned char buf[MAX_INSN_SIZE];
+	struct insn insn;
+	int nr_copied;
+
+	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3 || !regs)
+		return false;
+
+	nr_copied = insn_fetch_from_user(regs, buf);
+	if (nr_copied <= 0)
+		return false;
+
+	if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
+		return false;
+
+	if (insn.length != 1)
+		return false;
+
+	if (insn.opcode.bytes[0] != 0xfa &&
+	    insn.opcode.bytes[0] != 0xfb)
+		return false;
+
+	if (printk_ratelimit()) {
+		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
+		       current->comm, task_pid_nr(current), regs->ip);
+		print_vma_addr(KERN_CONT " in ", regs->ip);
+		pr_cont("\n");
+	}
+
+	regs->ip += 1;
+	return true;
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +588,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr
 	tsk = current;
 
 	if (user_mode(regs)) {
+		if (fixup_iopl_exception(regs))
+			goto exit;
+
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17  9:20         ` [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage Peter Zijlstra
@ 2021-09-17 10:29           ` Ondrej Zary
  2021-09-17 11:54             ` Peter Zijlstra
  2021-09-17 10:40           ` Thomas Gleixner
  2021-09-17 22:23           ` Linus Torvalds
  2 siblings, 1 reply; 24+ messages in thread
From: Ondrej Zary @ 2021-09-17 10:29 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, x86, linux-kernel, Linus Torvalds

On Friday 17 September 2021, Peter Zijlstra wrote:
> On Fri, Sep 17, 2021 at 10:11:31AM +0200, Ondrej Zary wrote:
> > Yeah, it works!
> 
> w00t!! I've added a pr_err() to make sure people take note their
> 'software' is doing dodgy things.

It's a bit noisy:
[    9.668952] process 'hp/hp-health/bin/hpasmd' started with executable stack
[    9.741338] floppy0: no floppy controllers found
[    9.866354] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:f7d9109b in mem[f7d91000+3000]
[    9.866500] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:f7d921a2 in mem[f7d91000+3000]
[   10.141846] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.142157] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.269408] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a538f
[   10.269521] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.269754] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.273606] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.287503] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   10.301421] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.876824] fixup_iopl_exception: 333 callbacks suppressed
[   14.876832] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.890704] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.904581] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.918469] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.932352] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.946225] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.960096] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.973976] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   14.987861] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   15.001732] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   15.688468] tg3 0000:03:01.0 enp3s1f0: Link is up at 1000 Mbps, full duplex
[   15.688492] tg3 0000:03:01.0 enp3s1f0: Flow control is on for TX and on for RX
[   15.688524] IPv6: ADDRCONF(NETDEV_CHANGE): enp3s1f0: link becomes ready
[   19.877230] fixup_iopl_exception: 355 callbacks suppressed
[   19.877238] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.891103] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.904971] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.918817] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.932677] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.946550] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.960422] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.974292] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   19.988158] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   20.002029] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.881216] fixup_iopl_exception: 357 callbacks suppressed
[   24.881222] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.895109] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.908983] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.922846] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.936717] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.950576] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.964452] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.978324] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   24.992196] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   25.006066] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.895371] fixup_iopl_exception: 356 callbacks suppressed
[   29.895377] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.909245] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.923114] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.936984] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.950863] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.964755] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.978637] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   29.992515] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   30.006400] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
[   30.020268] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356

 
> ---
> Subject: x86/iopl: Fake iopl(3) CLI/STI usage
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu, 16 Sep 2021 23:05:09 +0200
> 
> Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
> scope") it's possible to emulate iopl(3) using ioperm(), except for
> the CLI/STI usage.
> 
> Userspace CLI/STI usage is very dubious (read broken), since any
> exception taken during that window can lead to rescheduling anyway (or
> worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
> discouraged and might even crash the system.
> 
> Of course, that won't stop people and HP has the dubious honour of
> being the first vendor to be found using this in their hp-health
> package.
> 
> In order to enable this 'software' to still 'work', have the #GP treat
> the CLI/STI instructions as NOPs when iopl(3). Warn the user that
> their program is doing dubious things.
> 
> Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
> Reported-by: Ondrej Zary <linux@zary.sk>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/kernel/traps.c |   38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -528,6 +528,41 @@ static enum kernel_gp_hint get_kernel_gp
>  
>  #define GPFSTR "general protection fault"
>  
> +bool fixup_iopl_exception(struct pt_regs *regs)
> +{
> +	struct thread_struct *t = &current->thread;
> +	unsigned char buf[MAX_INSN_SIZE];
> +	struct insn insn;
> +	int nr_copied;
> +
> +	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3 || !regs)
> +		return false;
> +
> +	nr_copied = insn_fetch_from_user(regs, buf);
> +	if (nr_copied <= 0)
> +		return false;
> +
> +	if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
> +		return false;
> +
> +	if (insn.length != 1)
> +		return false;
> +
> +	if (insn.opcode.bytes[0] != 0xfa &&
> +	    insn.opcode.bytes[0] != 0xfb)
> +		return false;
> +
> +	if (printk_ratelimit()) {
> +		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
> +		       current->comm, task_pid_nr(current), regs->ip);
> +		print_vma_addr(KERN_CONT " in ", regs->ip);
> +		pr_cont("\n");
> +	}
> +
> +	regs->ip += 1;
> +	return true;
> +}
> +
>  DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
>  {
>  	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
> @@ -553,6 +588,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_pr
>  	tsk = current;
>  
>  	if (user_mode(regs)) {
> +		if (fixup_iopl_exception(regs))
> +			goto exit;
> +
>  		tsk->thread.error_code = error_code;
>  		tsk->thread.trap_nr = X86_TRAP_GP;
>  
> 



-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17  9:20         ` [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage Peter Zijlstra
  2021-09-17 10:29           ` Ondrej Zary
@ 2021-09-17 10:40           ` Thomas Gleixner
  2021-09-17 22:23           ` Linus Torvalds
  2 siblings, 0 replies; 24+ messages in thread
From: Thomas Gleixner @ 2021-09-17 10:40 UTC (permalink / raw)
  To: Peter Zijlstra, Ondrej Zary; +Cc: x86, linux-kernel, Linus Torvalds

On Fri, Sep 17 2021 at 11:20, Peter Zijlstra wrote:
> Subject: x86/iopl: Fake iopl(3) CLI/STI usage
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu, 16 Sep 2021 23:05:09 +0200
>
> Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
> scope") it's possible to emulate iopl(3) using ioperm(), except for
> the CLI/STI usage.
>
> Userspace CLI/STI usage is very dubious (read broken), since any
> exception taken during that window can lead to rescheduling anyway (or
> worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
> discouraged and might even crash the system.
>
> Of course, that won't stop people and HP has the dubious honour of
> being the first vendor to be found using this in their hp-health
> package.
>
> In order to enable this 'software' to still 'work', have the #GP treat
> the CLI/STI instructions as NOPs when iopl(3). Warn the user that
> their program is doing dubious things.
>
> Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
> Reported-by: Ondrej Zary <linux@zary.sk>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17 10:29           ` Ondrej Zary
@ 2021-09-17 11:54             ` Peter Zijlstra
  2021-09-17 12:33               ` Thomas Gleixner
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-17 11:54 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: Thomas Gleixner, x86, linux-kernel, Linus Torvalds

On Fri, Sep 17, 2021 at 12:29:18PM +0200, Ondrej Zary wrote:
> On Friday 17 September 2021, Peter Zijlstra wrote:
> > On Fri, Sep 17, 2021 at 10:11:31AM +0200, Ondrej Zary wrote:
> > > Yeah, it works!
> > 
> > w00t!! I've added a pr_err() to make sure people take note their
> > 'software' is doing dodgy things.
> 
> It's a bit noisy:
> [    9.668952] process 'hp/hp-health/bin/hpasmd' started with executable stack
> [    9.741338] floppy0: no floppy controllers found
> [    9.866354] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:f7d9109b in mem[f7d91000+3000]
> [    9.866500] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:f7d921a2 in mem[f7d91000+3000]
> [   10.141846] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.142157] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.269408] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a538f
> [   10.269521] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.269754] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.273606] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.287503] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   10.301421] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> [   14.876824] fixup_iopl_exception: 333 callbacks suppressed
> [   14.876832] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356

I'd say...

Not sure it's really worth it, but something like the below might help.

--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -518,6 +518,7 @@ struct thread_struct {
 	 */
 	unsigned long		iopl_emul;
 
+	unsigned int		iopl_warn:1;
 	unsigned int		sig_on_uaccess_err:1;
 
 	/*
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flag
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap = NULL;
+	p->thread.iopl_warn = 0;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 #ifdef CONFIG_X86_64
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -552,11 +552,12 @@ bool fixup_iopl_exception(struct pt_regs
 	    insn.opcode.bytes[0] != 0xfb)
 		return false;
 
-	if (printk_ratelimit()) {
+	if (!t->iopl_warn && printk_ratelimit()) {
 		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
 		       current->comm, task_pid_nr(current), regs->ip);
 		print_vma_addr(KERN_CONT " in ", regs->ip);
 		pr_cont("\n");
+		t->iopl_warn = 1;
 	}
 
 	regs->ip += 1;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17 11:54             ` Peter Zijlstra
@ 2021-09-17 12:33               ` Thomas Gleixner
  2021-09-17 12:54                 ` Ondrej Zary
  0 siblings, 1 reply; 24+ messages in thread
From: Thomas Gleixner @ 2021-09-17 12:33 UTC (permalink / raw)
  To: Peter Zijlstra, Ondrej Zary; +Cc: x86, linux-kernel, Linus Torvalds

On Fri, Sep 17 2021 at 13:54, Peter Zijlstra wrote:
>> [   14.876824] fixup_iopl_exception: 333 callbacks suppressed
>> [   14.876832] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
>
> I'd say...
>
> Not sure it's really worth it, but something like the below might
> help.

One entry per task is really good enough. Though that wont help for such
stuff which is started over and over again....

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17 12:33               ` Thomas Gleixner
@ 2021-09-17 12:54                 ` Ondrej Zary
  0 siblings, 0 replies; 24+ messages in thread
From: Ondrej Zary @ 2021-09-17 12:54 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Peter Zijlstra, x86, linux-kernel, Linus Torvalds

On Friday 17 September 2021, Thomas Gleixner wrote:
> On Fri, Sep 17 2021 at 13:54, Peter Zijlstra wrote:
> >> [   14.876824] fixup_iopl_exception: 333 callbacks suppressed
> >> [   14.876832] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:80a5356
> >
> > I'd say...
> >
> > Not sure it's really worth it, but something like the below might
> > help.
> 
> One entry per task is really good enough. Though that wont help for such
> stuff which is started over and over again....

Seems to fine for this case. hpasmd is running as a daemon.

# dmesg | tail
[    6.834639] atyfb: fb0: ATY Mach64 frame buffer device on PCI
[    7.629499] random: crng init done
[    7.629507] random: 7 urandom warning(s) missed due to ratelimiting
[    9.737551] floppy0: no floppy controllers found
[    9.737584] work still pending
[    9.923185] process 'hp/hp-health/bin/hpasmd' started with executable stack
[   10.140055] traps: hpasmd[359] attempts to use CLI/STI, pretending it's a NOP, ip:f7d3709b in mem[f7d37000+3000]
[   15.821726] tg3 0000:03:01.0 enp3s1f0: Link is up at 1000 Mbps, full duplex
[   15.821753] tg3 0000:03:01.0 enp3s1f0: Flow control is on for TX and on for RX
[   15.821786] IPv6: ADDRCONF(NETDEV_CHANGE): enp3s1f0: link becomes ready
# hpasmcli -s "show fans"

Fan  Location        Present Speed  of max  Redundant  Partner  Hot-pluggable
---  --------        ------- -----  ------  ---------  -------  -------------
#1   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        2        Yes
#2   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#3   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#4   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#5   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#6   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#7   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes
#8   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes


-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17  9:20         ` [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage Peter Zijlstra
  2021-09-17 10:29           ` Ondrej Zary
  2021-09-17 10:40           ` Thomas Gleixner
@ 2021-09-17 22:23           ` Linus Torvalds
  2021-09-17 22:24             ` Linus Torvalds
  2 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2021-09-17 22:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Fri, Sep 17, 2021 at 2:21 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> +       nr_copied = insn_fetch_from_user(regs, buf);

Ugh. This is the code that does the magic "add CS base" stuff.

Do we really want to do that instead of just doing

        unsigned char byte = get_user((char __user *)regs->ip);

when later on the debug code does:

> +               pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
> +                      current->comm, task_pid_nr(current), regs->ip);
> +               print_vma_addr(KERN_CONT " in ", regs->ip);
> +               pr_cont("\n");

and prints out the wrong IP address?

IOW, I'd argue that you should get it right in both places, or not try
to get it right in one but not the other.

I think the proper thing to do is perhaps something like

        unsigned long cs_base = 0;
        unsigned long address;
        unsigned char byte;

        if (!user_64bit_mode(regs)) {
                cs_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
                if (cs_base = -1ul)
                        return false;
        }

        // We could check the limit too, but nobody cares
        address = regs->ip + cs_base;
        if (get_user(byte, (const char __user *)address))
                return false;

        // cli/sti?
        if (byte != 0xfa && byte ! 0xfb)
                return false;

and now you have the actual linear address in 'address' and can at
least print it out correctly.

Hmm? Because it's just sad to get it right in one place, and wrong in
another. And we don't actually _want_ any of the instruction
fetch/decode stuff.

           Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17 22:23           ` Linus Torvalds
@ 2021-09-17 22:24             ` Linus Torvalds
  2021-09-18  7:05               ` Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2021-09-17 22:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Fri, Sep 17, 2021 at 3:23 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> I think the proper thing to do is perhaps something like

The alternative is to just ignore cs_abse entirely, and just use
"regs->ip", which makes this all even easier.

If somebody uses a code segment _and_ cli/sti, maybe they should just
get the SIGSEGV?

               Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-17 22:24             ` Linus Torvalds
@ 2021-09-18  7:05               ` Peter Zijlstra
  2021-09-18  9:06                 ` Peter Zijlstra
  2021-09-21 21:01                 ` [PATCH] " Andy Lutomirski
  0 siblings, 2 replies; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-18  7:05 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Fri, Sep 17, 2021 at 03:24:51PM -0700, Linus Torvalds wrote:
> On Fri, Sep 17, 2021 at 3:23 PM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > I think the proper thing to do is perhaps something like
> 
> The alternative is to just ignore cs_abse entirely, and just use
> "regs->ip", which makes this all even easier.
> 
> If somebody uses a code segment _and_ cli/sti, maybe they should just
> get the SIGSEGV?

I did a hatched job on fixup_ump_exception() which is why it looks like
it does, that said...

our case at hand mmap()'s BIOS code from /dev/mem and executes that, I
don't think it does an LDT segment but it would be entirely in line with
the level of hack we're looking at.

Let me frob at this after breakfast and see if I can make it better.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  7:05               ` Peter Zijlstra
@ 2021-09-18  9:06                 ` Peter Zijlstra
  2021-09-18 15:53                   ` Ondrej Zary
                                     ` (3 more replies)
  2021-09-21 21:01                 ` [PATCH] " Andy Lutomirski
  1 sibling, 4 replies; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-18  9:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Sat, Sep 18, 2021 at 09:05:28AM +0200, Peter Zijlstra wrote:
> On Fri, Sep 17, 2021 at 03:24:51PM -0700, Linus Torvalds wrote:
> > On Fri, Sep 17, 2021 at 3:23 PM Linus Torvalds
> > <torvalds@linux-foundation.org> wrote:
> > >
> > > I think the proper thing to do is perhaps something like
> > 
> > The alternative is to just ignore cs_abse entirely, and just use
> > "regs->ip", which makes this all even easier.
> > 
> > If somebody uses a code segment _and_ cli/sti, maybe they should just
> > get the SIGSEGV?
> 
> I did a hatched job on fixup_ump_exception() which is why it looks like
> it does, that said...
> 
> our case at hand mmap()'s BIOS code from /dev/mem and executes that, I
> don't think it does an LDT segment but it would be entirely in line with
> the level of hack we're looking at.
> 
> Let me frob at this after breakfast and see if I can make it better.

How's this then? I should probably look to see if I should be using this
insn_get_effective_ip() for perf_instruction_pointer() too. Although I
suspect we maybe took a shortcut there in favour of performance.

---
 arch/x86/include/asm/insn-eval.h |  1 +
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/process.c        |  1 +
 arch/x86/kernel/traps.c          | 33 +++++++++++++++++++++++++++++++++
 arch/x86/lib/insn-eval.c         |  2 +-
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182ad2d6..4ec3613551e3 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
 int insn_fetch_from_user(struct pt_regs *regs,
 			 unsigned char buf[MAX_INSN_SIZE]);
 int insn_fetch_from_user_inatomic(struct pt_regs *regs,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9ad2acaaae9b..577f342dbfb2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -518,6 +518,7 @@ struct thread_struct {
 	 */
 	unsigned long		iopl_emul;
 
+	unsigned int		iopl_warn:1;
 	unsigned int		sig_on_uaccess_err:1;
 
 	/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d9463e3096b..f2f733bcb2b9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap = NULL;
+	p->thread.iopl_warn = 0;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a58800973aed..f3f3034b06f3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -528,6 +528,36 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 
 #define GPFSTR "general protection fault"
 
+static bool fixup_iopl_exception(struct pt_regs *regs)
+{
+	struct thread_struct *t = &current->thread;
+	unsigned char byte;
+	unsigned long ip;
+
+	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
+		return false;
+
+	if (insn_get_effective_ip(regs, &ip))
+		return false;
+
+	if (get_user(byte, (const char __user *)ip))
+		return false;
+
+	if (byte != 0xfa && byte != 0xfb) /* CLI, STI */
+		return false;
+
+	if (!t->iopl_warn && printk_ratelimit()) {
+		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
+		       current->comm, task_pid_nr(current), ip);
+		print_vma_addr(KERN_CONT " in ", ip);
+		pr_cont("\n");
+		t->iopl_warn = 1;
+	}
+
+	regs->ip += 1;
+	return true;
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +583,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	tsk = current;
 
 	if (user_mode(regs)) {
+		if (fixup_iopl_exception(regs))
+			goto exit;
+
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a1d24fdc07cf..eb3ccffb9b9d 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
 	}
 }
 
-static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
 {
 	unsigned long seg_base = 0;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  9:06                 ` Peter Zijlstra
@ 2021-09-18 15:53                   ` Ondrej Zary
  2021-09-18 16:35                   ` Linus Torvalds
                                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Ondrej Zary @ 2021-09-18 15:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Saturday 18 September 2021 11:06:41 Peter Zijlstra wrote:
> On Sat, Sep 18, 2021 at 09:05:28AM +0200, Peter Zijlstra wrote:
> > On Fri, Sep 17, 2021 at 03:24:51PM -0700, Linus Torvalds wrote:
> > > On Fri, Sep 17, 2021 at 3:23 PM Linus Torvalds
> > > <torvalds@linux-foundation.org> wrote:
> > > >
> > > > I think the proper thing to do is perhaps something like
> > > 
> > > The alternative is to just ignore cs_abse entirely, and just use
> > > "regs->ip", which makes this all even easier.
> > > 
> > > If somebody uses a code segment _and_ cli/sti, maybe they should just
> > > get the SIGSEGV?
> > 
> > I did a hatched job on fixup_ump_exception() which is why it looks like
> > it does, that said...
> > 
> > our case at hand mmap()'s BIOS code from /dev/mem and executes that, I
> > don't think it does an LDT segment but it would be entirely in line with
> > the level of hack we're looking at.
> > 
> > Let me frob at this after breakfast and see if I can make it better.
> 
> How's this then? I should probably look to see if I should be using this
> insn_get_effective_ip() for perf_instruction_pointer() too. Although I
> suspect we maybe took a shortcut there in favour of performance.

Good for me because it works.
# dmesg | tail
[    7.229031] Console: switching to colour frame buffer device 128x48
[    7.234234] atyfb: fb0: ATY Mach64 frame buffer device on PCI
[    7.929907] random: crng init done
[    7.929913] random: 7 urandom warning(s) missed due to ratelimiting
[    8.441701] process 'hp/hp-health/bin/hpasmd' started with executable stack
[    8.638998] traps: hpasmd[360] attempts to use CLI/STI, pretending it's a NOP, ip:f7cb109b in mem[f7cb1000+3000]
[   10.253851] floppy0: no floppy controllers found
[   16.413225] tg3 0000:03:01.0 enp3s1f0: Link is up at 1000 Mbps, full duplex
[   16.413257] tg3 0000:03:01.0 enp3s1f0: Flow control is on for TX and on for RX
[   16.414645] IPv6: ADDRCONF(NETDEV_CHANGE): enp3s1f0: link becomes ready
root@edi2:/home/edi# hpasmcli -s "show fans"

Fan  Location        Present Speed  of max  Redundant  Partner  Hot-pluggable
---  --------        ------- -----  ------  ---------  -------  -------------
#1   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        2        Yes
#2   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#3   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#4   I/O_ZONE        Yes     NORMAL  11%     Yes        1        Yes
#5   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#6   PROCESSOR_ZONE  Yes     NORMAL  22%     Yes        1        Yes
#7   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes
#8   POWERSUPPLY_BAY Yes     NORMAL  11%     Yes        1        Yes


-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  9:06                 ` Peter Zijlstra
  2021-09-18 15:53                   ` Ondrej Zary
@ 2021-09-18 16:35                   ` Linus Torvalds
  2021-09-21  7:28                   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
  2021-09-21 12:41                   ` tip-bot2 for Peter Zijlstra
  3 siblings, 0 replies; 24+ messages in thread
From: Linus Torvalds @ 2021-09-18 16:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List

On Sat, Sep 18, 2021 at 2:07 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> How's this then?

Looks good to me. Thanks,

          Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [tip: x86/core] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  9:06                 ` Peter Zijlstra
  2021-09-18 15:53                   ` Ondrej Zary
  2021-09-18 16:35                   ` Linus Torvalds
@ 2021-09-21  7:28                   ` tip-bot2 for Peter Zijlstra
  2021-09-21 11:09                     ` Ondrej Zary
  2021-09-21 12:41                   ` tip-bot2 for Peter Zijlstra
  3 siblings, 1 reply; 24+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-09-21  7:28 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ondrej Zary, Peter Zijlstra (Intel), Thomas Gleixner, x86, linux-kernel

The following commit has been merged into the x86/core branch of tip:

Commit-ID:     32e1ae626f295152d1fc9a3375214133cbe62878
Gitweb:        https://git.kernel.org/tip/32e1ae626f295152d1fc9a3375214133cbe62878
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 17 Sep 2021 11:20:04 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Sat, 18 Sep 2021 12:18:32 +02:00

x86/iopl: Fake iopl(3) CLI/STI usage

Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
scope") it's possible to emulate iopl(3) using ioperm(), except for
the CLI/STI usage.

Userspace CLI/STI usage is very dubious (read broken), since any
exception taken during that window can lead to rescheduling anyway (or
worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
discouraged and might even crash the system.

Of course, that won't stop people and HP has the dubious honour of
being the first vendor to be found using this in their hp-health
package.

In order to enable this 'software' to still 'work', have the #GP treat
the CLI/STI instructions as NOPs when iopl(3). Warn the user that
their program is doing dubious things.

Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
Reported-by: Ondrej Zary <linux@zary.sk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20210918090641.GD5106@worktop.programming.kicks-ass.net
---
 arch/x86/include/asm/insn-eval.h |  1 +-
 arch/x86/include/asm/processor.h |  1 +-
 arch/x86/kernel/process.c        |  1 +-
 arch/x86/kernel/traps.c          | 33 +++++++++++++++++++++++++++++++-
 arch/x86/lib/insn-eval.c         |  2 +-
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182..4ec3613 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
 int insn_fetch_from_user(struct pt_regs *regs,
 			 unsigned char buf[MAX_INSN_SIZE]);
 int insn_fetch_from_user_inatomic(struct pt_regs *regs,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9ad2aca..577f342 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -518,6 +518,7 @@ struct thread_struct {
 	 */
 	unsigned long		iopl_emul;
 
+	unsigned int		iopl_warn:1;
 	unsigned int		sig_on_uaccess_err:1;
 
 	/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d9463e..f2f733b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap = NULL;
+	p->thread.iopl_warn = 0;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a588009..f3f3034 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -528,6 +528,36 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 
 #define GPFSTR "general protection fault"
 
+static bool fixup_iopl_exception(struct pt_regs *regs)
+{
+	struct thread_struct *t = &current->thread;
+	unsigned char byte;
+	unsigned long ip;
+
+	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
+		return false;
+
+	if (insn_get_effective_ip(regs, &ip))
+		return false;
+
+	if (get_user(byte, (const char __user *)ip))
+		return false;
+
+	if (byte != 0xfa && byte != 0xfb)
+		return false;
+
+	if (!t->iopl_warn && printk_ratelimit()) {
+		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
+		       current->comm, task_pid_nr(current), ip);
+		print_vma_addr(KERN_CONT " in ", ip);
+		pr_cont("\n");
+		t->iopl_warn = 1;
+	}
+
+	regs->ip += 1;
+	return true;
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +583,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	tsk = current;
 
 	if (user_mode(regs)) {
+		if (fixup_iopl_exception(regs))
+			goto exit;
+
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a1d24fd..eb3ccff 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
 	}
 }
 
-static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
 {
 	unsigned long seg_base = 0;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [tip: x86/core] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-21  7:28                   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
@ 2021-09-21 11:09                     ` Ondrej Zary
  2021-09-21 12:00                       ` Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Ondrej Zary @ 2021-09-21 11:09 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-tip-commits, Peter Zijlstra (Intel), Thomas Gleixner, x86

On Tuesday 21 September 2021, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the x86/core branch of tip:
> 
> Commit-ID:     32e1ae626f295152d1fc9a3375214133cbe62878
> Gitweb:        https://git.kernel.org/tip/32e1ae626f295152d1fc9a3375214133cbe62878
> Author:        Peter Zijlstra <peterz@infradead.org>
> AuthorDate:    Fri, 17 Sep 2021 11:20:04 +02:00
> Committer:     Peter Zijlstra <peterz@infradead.org>
> CommitterDate: Sat, 18 Sep 2021 12:18:32 +02:00
> 
> x86/iopl: Fake iopl(3) CLI/STI usage
> 
> Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
> scope") it's possible to emulate iopl(3) using ioperm(), except for
> the CLI/STI usage.
> 
> Userspace CLI/STI usage is very dubious (read broken), since any
> exception taken during that window can lead to rescheduling anyway (or
> worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
> discouraged and might even crash the system.
> 
> Of course, that won't stop people and HP has the dubious honour of
> being the first vendor to be found using this in their hp-health
> package.
> 
> In order to enable this 'software' to still 'work', have the #GP treat
> the CLI/STI instructions as NOPs when iopl(3). Warn the user that
> their program is doing dubious things.
> 
> Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
> Reported-by: Ondrej Zary <linux@zary.sk>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
> Link: https://lkml.kernel.org/r/20210918090641.GD5106@worktop.programming.kicks-ass.net

Could this be backported to 5.10 kernel so it can get into the recently released Debian 11?

-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [tip: x86/core] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-21 11:09                     ` Ondrej Zary
@ 2021-09-21 12:00                       ` Peter Zijlstra
  2021-09-21 17:31                         ` Ondrej Zary
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2021-09-21 12:00 UTC (permalink / raw)
  To: Ondrej Zary; +Cc: linux-kernel, linux-tip-commits, Thomas Gleixner, x86

On Tue, Sep 21, 2021 at 01:09:26PM +0200, Ondrej Zary wrote:
> On Tuesday 21 September 2021, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the x86/core branch of tip:
> > 
> > Commit-ID:     32e1ae626f295152d1fc9a3375214133cbe62878
> > Gitweb:        https://git.kernel.org/tip/32e1ae626f295152d1fc9a3375214133cbe62878
> > Author:        Peter Zijlstra <peterz@infradead.org>
> > AuthorDate:    Fri, 17 Sep 2021 11:20:04 +02:00
> > Committer:     Peter Zijlstra <peterz@infradead.org>
> > CommitterDate: Sat, 18 Sep 2021 12:18:32 +02:00
> > 
> > x86/iopl: Fake iopl(3) CLI/STI usage
> > 
> > Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
> > scope") it's possible to emulate iopl(3) using ioperm(), except for
> > the CLI/STI usage.
> > 
> > Userspace CLI/STI usage is very dubious (read broken), since any
> > exception taken during that window can lead to rescheduling anyway (or
> > worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
> > discouraged and might even crash the system.
> > 
> > Of course, that won't stop people and HP has the dubious honour of
> > being the first vendor to be found using this in their hp-health
> > package.
> > 
> > In order to enable this 'software' to still 'work', have the #GP treat
> > the CLI/STI instructions as NOPs when iopl(3). Warn the user that
> > their program is doing dubious things.
> > 
> > Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
> > Reported-by: Ondrej Zary <linux@zary.sk>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
> > Link: https://lkml.kernel.org/r/20210918090641.GD5106@worktop.programming.kicks-ass.net
> 
> Could this be backported to 5.10 kernel so it can get into the recently released Debian 11?

Thomas also asked about a stable tag, so I'll rebase and force-push
these commits and add:

Cc: stable@kernel.org # v5.5+

to it.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [tip: x86/core] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  9:06                 ` Peter Zijlstra
                                     ` (2 preceding siblings ...)
  2021-09-21  7:28                   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
@ 2021-09-21 12:41                   ` tip-bot2 for Peter Zijlstra
  3 siblings, 0 replies; 24+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2021-09-21 12:41 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Ondrej Zary, Peter Zijlstra (Intel), Thomas Gleixner, stable, #,
	v5.5+,
	x86, linux-kernel

The following commit has been merged into the x86/core branch of tip:

Commit-ID:     b968e84b509da593c50dc3db679e1d33de701f78
Gitweb:        https://git.kernel.org/tip/b968e84b509da593c50dc3db679e1d33de701f78
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 17 Sep 2021 11:20:04 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 21 Sep 2021 13:52:18 +02:00

x86/iopl: Fake iopl(3) CLI/STI usage

Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
scope") it's possible to emulate iopl(3) using ioperm(), except for
the CLI/STI usage.

Userspace CLI/STI usage is very dubious (read broken), since any
exception taken during that window can lead to rescheduling anyway (or
worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
discouraged and might even crash the system.

Of course, that won't stop people and HP has the dubious honour of
being the first vendor to be found using this in their hp-health
package.

In order to enable this 'software' to still 'work', have the #GP treat
the CLI/STI instructions as NOPs when iopl(3). Warn the user that
their program is doing dubious things.

Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
Reported-by: Ondrej Zary <linux@zary.sk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org # v5.5+
Link: https://lkml.kernel.org/r/20210918090641.GD5106@worktop.programming.kicks-ass.net
---
 arch/x86/include/asm/insn-eval.h |  1 +-
 arch/x86/include/asm/processor.h |  1 +-
 arch/x86/kernel/process.c        |  1 +-
 arch/x86/kernel/traps.c          | 33 +++++++++++++++++++++++++++++++-
 arch/x86/lib/insn-eval.c         |  2 +-
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182..4ec3613 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
 int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
 unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
 int insn_fetch_from_user(struct pt_regs *regs,
 			 unsigned char buf[MAX_INSN_SIZE]);
 int insn_fetch_from_user_inatomic(struct pt_regs *regs,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9ad2aca..577f342 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -518,6 +518,7 @@ struct thread_struct {
 	 */
 	unsigned long		iopl_emul;
 
+	unsigned int		iopl_warn:1;
 	unsigned int		sig_on_uaccess_err:1;
 
 	/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d9463e..f2f733b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap = NULL;
+	p->thread.iopl_warn = 0;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a588009..f3f3034 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -528,6 +528,36 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
 
 #define GPFSTR "general protection fault"
 
+static bool fixup_iopl_exception(struct pt_regs *regs)
+{
+	struct thread_struct *t = &current->thread;
+	unsigned char byte;
+	unsigned long ip;
+
+	if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
+		return false;
+
+	if (insn_get_effective_ip(regs, &ip))
+		return false;
+
+	if (get_user(byte, (const char __user *)ip))
+		return false;
+
+	if (byte != 0xfa && byte != 0xfb)
+		return false;
+
+	if (!t->iopl_warn && printk_ratelimit()) {
+		pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
+		       current->comm, task_pid_nr(current), ip);
+		print_vma_addr(KERN_CONT " in ", ip);
+		pr_cont("\n");
+		t->iopl_warn = 1;
+	}
+
+	regs->ip += 1;
+	return true;
+}
+
 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 {
 	char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +583,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	tsk = current;
 
 	if (user_mode(regs)) {
+		if (fixup_iopl_exception(regs))
+			goto exit;
+
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a1d24fd..eb3ccff 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
 	}
 }
 
-static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
 {
 	unsigned long seg_base = 0;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [tip: x86/core] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-21 12:00                       ` Peter Zijlstra
@ 2021-09-21 17:31                         ` Ondrej Zary
  0 siblings, 0 replies; 24+ messages in thread
From: Ondrej Zary @ 2021-09-21 17:31 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, linux-tip-commits, Thomas Gleixner, x86

On Tuesday 21 September 2021 14:00:59 Peter Zijlstra wrote:
> On Tue, Sep 21, 2021 at 01:09:26PM +0200, Ondrej Zary wrote:
> > On Tuesday 21 September 2021, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the x86/core branch of tip:
> > > 
> > > Commit-ID:     32e1ae626f295152d1fc9a3375214133cbe62878
> > > Gitweb:        https://git.kernel.org/tip/32e1ae626f295152d1fc9a3375214133cbe62878
> > > Author:        Peter Zijlstra <peterz@infradead.org>
> > > AuthorDate:    Fri, 17 Sep 2021 11:20:04 +02:00
> > > Committer:     Peter Zijlstra <peterz@infradead.org>
> > > CommitterDate: Sat, 18 Sep 2021 12:18:32 +02:00
> > > 
> > > x86/iopl: Fake iopl(3) CLI/STI usage
> > > 
> > > Since commit c8137ace5638 ("x86/iopl: Restrict iopl() permission
> > > scope") it's possible to emulate iopl(3) using ioperm(), except for
> > > the CLI/STI usage.
> > > 
> > > Userspace CLI/STI usage is very dubious (read broken), since any
> > > exception taken during that window can lead to rescheduling anyway (or
> > > worse). The IOPL(2) manpage even states that usage of CLI/STI is highly
> > > discouraged and might even crash the system.
> > > 
> > > Of course, that won't stop people and HP has the dubious honour of
> > > being the first vendor to be found using this in their hp-health
> > > package.
> > > 
> > > In order to enable this 'software' to still 'work', have the #GP treat
> > > the CLI/STI instructions as NOPs when iopl(3). Warn the user that
> > > their program is doing dubious things.
> > > 
> > > Fixes: a24ca9976843 ("x86/iopl: Remove legacy IOPL option")
> > > Reported-by: Ondrej Zary <linux@zary.sk>
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
> > > Link: https://lkml.kernel.org/r/20210918090641.GD5106@worktop.programming.kicks-ass.net
> > 
> > Could this be backported to 5.10 kernel so it can get into the recently released Debian 11?
> 
> Thomas also asked about a stable tag, so I'll rebase and force-push
> these commits and add:
> 
> Cc: stable@kernel.org # v5.5+
> 
> to it.
> 

Thank you very much for great work.

-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage
  2021-09-18  7:05               ` Peter Zijlstra
  2021-09-18  9:06                 ` Peter Zijlstra
@ 2021-09-21 21:01                 ` Andy Lutomirski
  1 sibling, 0 replies; 24+ messages in thread
From: Andy Lutomirski @ 2021-09-21 21:01 UTC (permalink / raw)
  To: Peter Zijlstra (Intel), Linus Torvalds
  Cc: Ondrej Zary, Thomas Gleixner, the arch/x86 maintainers,
	Linux Kernel Mailing List


On Sat, Sep 18, 2021, at 12:05 AM, Peter Zijlstra wrote:
> On Fri, Sep 17, 2021 at 03:24:51PM -0700, Linus Torvalds wrote:
>> On Fri, Sep 17, 2021 at 3:23 PM Linus Torvalds
>> <torvalds@linux-foundation.org> wrote:
>> >
>> > I think the proper thing to do is perhaps something like
>> 
>> The alternative is to just ignore cs_abse entirely, and just use
>> "regs->ip", which makes this all even easier.
>> 
>> If somebody uses a code segment _and_ cli/sti, maybe they should just
>> get the SIGSEGV?
>
> I did a hatched job on fixup_ump_exception() which is why it looks like
> it does, that said...
>
> our case at hand mmap()'s BIOS code from /dev/mem and executes that, I
> don't think it does an LDT segment but it would be entirely in line with
> the level of hack we're looking at.
>
> Let me frob at this after breakfast and see if I can make it better.

The patch seems fine, but I have to ask: is this really worth fixing?

I suppose the log message could check if the computer comes from HP(E) and gently that the owner switch vendors.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2021-09-21 21:02 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-15 12:23 IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers Ondrej Zary
2021-09-16 17:09 ` Thomas Gleixner
2021-09-16 20:27   ` Ondrej Zary
2021-09-16 21:05     ` Peter Zijlstra
2021-09-17  8:11       ` Ondrej Zary
2021-09-17  9:20         ` [PATCH] x86/iopl: Fake iopl(3) CLI/STI usage Peter Zijlstra
2021-09-17 10:29           ` Ondrej Zary
2021-09-17 11:54             ` Peter Zijlstra
2021-09-17 12:33               ` Thomas Gleixner
2021-09-17 12:54                 ` Ondrej Zary
2021-09-17 10:40           ` Thomas Gleixner
2021-09-17 22:23           ` Linus Torvalds
2021-09-17 22:24             ` Linus Torvalds
2021-09-18  7:05               ` Peter Zijlstra
2021-09-18  9:06                 ` Peter Zijlstra
2021-09-18 15:53                   ` Ondrej Zary
2021-09-18 16:35                   ` Linus Torvalds
2021-09-21  7:28                   ` [tip: x86/core] " tip-bot2 for Peter Zijlstra
2021-09-21 11:09                     ` Ondrej Zary
2021-09-21 12:00                       ` Peter Zijlstra
2021-09-21 17:31                         ` Ondrej Zary
2021-09-21 12:41                   ` tip-bot2 for Peter Zijlstra
2021-09-21 21:01                 ` [PATCH] " Andy Lutomirski
2021-09-16 21:25     ` IOPL emulation breaks hpasmd (hp-health) needed by HP DL380 G4 servers Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.