All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Stephen Hemminger <stephen@networkplumber.org>,
	Willy Tarreau <w@1wt.eu>, Juergen Gross <jgross@suse.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [patch 7/9] x86/iopl: Restrict iopl() permission scope
Date: Wed, 06 Nov 2019 20:35:06 +0100	[thread overview]
Message-ID: <20191106202806.425388355@linutronix.de> (raw)
In-Reply-To: 20191106193459.581614484@linutronix.de

The access to the full I/O port range can be also provided by the TSS I/O
bitmap, but that would require to copy 8k of data on scheduling in the
task. As shown with the sched out optimization TSS.io_bitmap_base can be
used to switch the incoming task to a preallocated I/O bitmap which has all
bits zero, i.e. allows access to all I/O ports.

Implementing this allows to provide an iopl() emulation mode which restricts
the IOPL level 3 permissions to I/O port access but removes the STI/CLI
permission which is coming with the hardware IOPL mechansim.

Provide a config option to switch IOPL to emulation mode, make it the
default and while at it also provide an option to disable IOPL completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                 |   32 ++++++++++++++++
 arch/x86/include/asm/processor.h |   24 +++++++++---
 arch/x86/kernel/cpu/common.c     |    4 +-
 arch/x86/kernel/ioport.c         |   75 ++++++++++++++++++++++++++++++---------
 arch/x86/kernel/process.c        |   15 +++++--
 5 files changed, 123 insertions(+), 27 deletions(-)

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION
 	 Disabling this option saves about 7K of kernel size and
 	 possibly 4K of additional runtime pagetable memory.
 
+choice
+	prompt "IOPL"
+	default X86_IOPL_EMULATION
+
+config X86_IOPL_EMULATION
+	bool "IOPL Emulation"
+	---help---
+	  Legacy IOPL support is an overbroad mechanism which allows user
+	  space aside of accessing all 65536 I/O ports also to disable
+	  interrupts. To gain this access the caller needs CAP_SYS_RAWIO
+	  capabilities and permission from eventually active security
+	  modules.
+
+	  The emulation restricts the functionality of the syscall to
+	  only allowing the full range I/O port access, but prevents the
+	  ability to disable interrupts from user space.
+
+config X86_IOPL_LEGACY
+	bool "IOPL Legacy"
+	---help---
+	Allow the full IOPL permissions, i.e. user space access to all
+	65536 I/O ports and also the ability to disable interrupts, which
+	is overbroad and can result in system lockups.
+
+config X86_IOPL_NONE
+	bool "IOPL None"
+	---help---
+	Disable the IOPL permission syscall. That's the safest option as
+	no sane application should depend on this functionality.
+
+endchoice
+
 config TOSHIBA
 	tristate "Toshiba Laptop support"
 	depends on X86_32
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -331,8 +331,12 @@ struct x86_hw_tss {
 #define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
 
-#define IO_BITMAP_OFFSET_VALID				\
-	(offsetof(struct tss_struct, io_bitmap) -	\
+#define IO_BITMAP_OFFSET_VALID_MAP			\
+	(offsetof(struct tss_struct, io_bitmap_map) -	\
+	 offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL			\
+	(offsetof(struct tss_struct, io_bitmap_all) -	\
 	 offsetof(struct tss_struct, x86_tss))
 
 /*
@@ -343,7 +347,7 @@ struct x86_hw_tss {
  * last valid byte
  */
 #define __KERNEL_TSS_LIMIT	\
-	(IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+	(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
 
 /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
 #define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)
@@ -379,7 +383,8 @@ struct tss_struct {
 	 * byte beyond the end of the I/O permission bitmap. The extra byte
 	 * must have all bits set and must be within the TSS limit.
 	 */
-	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
+	unsigned long		io_bitmap_map[IO_BITMAP_LONGS + 1];
+	unsigned long		io_bitmap_all[IO_BITMAP_LONGS + 1];
 } __aligned(PAGE_SIZE);
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
@@ -495,7 +500,6 @@ struct thread_struct {
 #endif
 	/* IO permissions: */
 	unsigned long		*io_bitmap_ptr;
-	unsigned long		iopl;
 	/*
 	 * The byte range in the I/O permission bitmap which contains zero
 	 * bits.
@@ -503,6 +507,13 @@ struct thread_struct {
 	unsigned int		io_zerobits_start;
 	unsigned int		io_zerobits_end;
 
+	/*
+	 * IOPL. Priviledge level dependent I/O permission which includes
+	 * user space CLI/STI when granted.
+	 */
+	unsigned long		iopl;
+	unsigned long		iopl_emul;
+
 	mm_segment_t		addr_limit;
 
 	unsigned int		sig_on_uaccess_err:1;
@@ -524,6 +535,9 @@ static inline void arch_thread_struct_wh
 	*size = fpu_kernel_xstate_size;
 }
 
+extern void tss_update_io_bitmap(struct tss_struct *tss,
+				 struct thread_struct *thread);
+
 /*
  * Thread-synchronous status.
  *
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1866,7 +1866,9 @@ void cpu_init(void)
 	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
 	tss->io_zerobits_start = IO_BITMAP_BYTES;
 	tss->io_zerobits_end = 0;
-	memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
+	memset(tss->io_bitmap_map, 0xff, sizeof(tss->io_bitmap_map));
+	/* Invalidate the extra tail entry of the allow all I/O bitmap */
+	tss->io_bitmap_all[IO_BITMAP_LONGS] = ~0UL;
 	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 
 	load_TR_desc();
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -109,7 +109,7 @@ long ksys_ioperm(unsigned long from, uns
 	}
 
 	/* Copy the changed range over to the TSS bitmap */
-	dst = (char *)tss->io_bitmap;
+	dst = (char *)tss->io_bitmap_map;
 	src = (char *)bitmap;
 	memcpy(dst + copy_start, src + copy_start, copy_len);
 
@@ -130,7 +130,7 @@ long ksys_ioperm(unsigned long from, uns
 		 * is correct. It might have been wreckaged by a VMEXiT.
 		 */
 		set_thread_flag(TIF_IO_BITMAP);
-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID;
+		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID_MAP;
 		refresh_tss_limit();
 	}
 
@@ -184,36 +184,77 @@ SYSCALL_DEFINE3(ioperm, unsigned long, f
  */
 SYSCALL_DEFINE1(iopl, unsigned int, level)
 {
-	struct pt_regs *regs = current_pt_regs();
 	struct thread_struct *t = &current->thread;
+	struct pt_regs *regs = current_pt_regs();
+	unsigned int old;
 
 	/*
 	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
 	 * and changing them has no effect.
 	 */
-	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+	if (IS_ENABLED(CONFIG_X86_IOPL_NONE))
+		return -ENOSYS;
 
 	if (level > 3)
 		return -EINVAL;
+
+	if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION))
+		old = t->iopl_emul;
+	else
+		old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+
+	/* No point in going further if nothing changes */
+	if (level == old)
+		return 0;
+
 	/* Trying to gain more privileges? */
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO) ||
 		    security_locked_down(LOCKDOWN_IOPORT))
 			return -EPERM;
 	}
-	/*
-	 * Change the flags value on the return stack, which has been set
-	 * up on system-call entry. See also the fork and signal handling
-	 * code how this is handled.
-	 */
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
-		(level << X86_EFLAGS_IOPL_BIT);
-	/* Store the new level in the thread struct */
-	t->iopl = level << X86_EFLAGS_IOPL_BIT;
-	/*
-	 * X86_32 switches immediately and XEN handles it via emulation.
-	 */
-	set_iopl_mask(t->iopl);
+
+	if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) {
+		struct tss_struct *tss;
+		unsigned int tss_base;
+
+		/* Prevent racing against a task switch */
+		preempt_disable();
+		tss = this_cpu_ptr(&cpu_tss_rw);
+		if (level == 3) {
+			/* Grant access to all I/O ports */
+			set_thread_flag(TIF_IO_BITMAP);
+			tss_base = IO_BITMAP_OFFSET_VALID_ALL;
+		} else if (t->io_bitmap_ptr) {
+			/* Thread has a I/O bitmap */
+			tss_update_io_bitmap(tss, t);
+			set_thread_flag(TIF_IO_BITMAP);
+			tss_base = IO_BITMAP_OFFSET_VALID_MAP;
+		} else {
+			/* Take it out of the context switch work burden */
+			clear_thread_flag(TIF_IO_BITMAP);
+			tss_base = IO_BITMAP_OFFSET_INVALID;
+		}
+		tss->x86_tss.io_bitmap_base = tss_base;
+		t->iopl_emul = level;
+		preempt_enable();
+
+	} else {
+		/*
+		 * Change the flags value on the return stack, which has
+		 * been set up on system-call entry. See also the fork and
+		 * signal handling code how this is handled.
+		 */
+		regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
+			(level << X86_EFLAGS_IOPL_BIT);
+		/* Store the new level in the thread struct */
+		t->iopl = level << X86_EFLAGS_IOPL_BIT;
+		/*
+		 * X86_32 switches immediately and XEN handles it via
+		 * emulation.
+		 */
+		set_iopl_mask(t->iopl);
+	}
 
 	return 0;
 }
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -357,8 +357,7 @@ void arch_setup_new_exec(void)
 	}
 }
 
-static void tss_update_io_bitmap(struct tss_struct *tss,
-				 struct thread_struct *thread)
+void tss_update_io_bitmap(struct tss_struct *tss, struct thread_struct *thread)
 {
 	unsigned int start, len;
 	char *src, *dst;
@@ -387,9 +386,17 @@ static inline void switch_to_bitmap(stru
 				    unsigned long tifp, unsigned long tifn)
 {
 	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+	u16 *base = &tss->x86_tss.io_bitmap_base;
 
 	if (tifn & _TIF_IO_BITMAP) {
-		tss_update_io_bitmap(tss, next);
+		/*
+		 * IF IOPL emulation is enabled and the emulated I/O
+		 * priviledge level is 3, switch to the 'grant all' bitmap.
+		 */
+		if (IS_ENABLED(CONFIG_IOPL_EMULATION) && next->iopl_emul == 3)
+			*base = IO_BITMAP_OFFSET_VALID_ALL;
+		else
+			tss_update_io_bitmap(tss, next);
 		/*
 		 * Make sure that the TSS limit is covering the io bitmap.
 		 * It might have been cut down by a VMEXIT to 0x67 which
@@ -405,7 +412,7 @@ static inline void switch_to_bitmap(stru
 		 * by moving it outside the TSS limit so any subsequent I/O
 		 * access from user space will trigger a #GP.
 		 */
-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+		*base = IO_BITMAP_OFFSET_INVALID;
 	}
 }
 



  parent reply	other threads:[~2019-11-06 20:57 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-06 19:34 [patch 0/9] x86/iopl: Prevent user space from using CLI/STI with iopl(3) Thomas Gleixner
2019-11-06 19:35 ` [patch 1/9] x86/ptrace: Prevent truncation of bitmap size Thomas Gleixner
2019-11-07  7:31   ` Ingo Molnar
2019-11-06 19:35 ` [patch 2/9] x86/process: Unify copy_thread_tls() Thomas Gleixner
2019-11-08 22:31   ` Andy Lutomirski
2019-11-08 23:43     ` Thomas Gleixner
2019-11-10 12:36       ` Thomas Gleixner
2019-11-10 16:56         ` Andy Lutomirski
2019-11-11  8:52           ` Peter Zijlstra
2019-11-06 19:35 ` [patch 3/9] x86/cpu: Unify cpu_init() Thomas Gleixner
2019-11-08 22:34   ` Andy Lutomirski
2019-11-11  4:22   ` kbuild test robot
2019-11-06 19:35 ` [patch 4/9] x86/io: Speedup schedule out of I/O bitmap user Thomas Gleixner
2019-11-07  9:12   ` Peter Zijlstra
2019-11-07 14:04     ` Thomas Gleixner
2019-11-07 14:08       ` Thomas Gleixner
2019-11-08 22:41         ` Andy Lutomirski
2019-11-08 23:45           ` Thomas Gleixner
2019-11-09  3:32             ` Andy Lutomirski
2019-11-10 12:43               ` Thomas Gleixner
2019-11-09  0:24   ` Andy Lutomirski
2019-11-09  1:18   ` kbuild test robot
2019-11-06 19:35 ` [patch 5/9] x86/ioport: Reduce ioperm impact for sane usage further Thomas Gleixner
2019-11-07  1:11   ` Linus Torvalds
2019-11-07  7:44     ` Thomas Gleixner
2019-11-07  8:25     ` Ingo Molnar
2019-11-07  9:17       ` Willy Tarreau
2019-11-07 10:00         ` Thomas Gleixner
2019-11-07 10:13           ` Willy Tarreau
2019-11-07 10:19           ` hpa
2019-11-07 10:27             ` Willy Tarreau
2019-11-07 10:50               ` hpa
2019-11-07 12:56                 ` Willy Tarreau
2019-11-07 16:45                   ` Eric W. Biederman
2019-11-07 16:53                     ` Linus Torvalds
2019-11-07 16:57                     ` Willy Tarreau
2019-11-10 17:17       ` Andy Lutomirski
2019-11-07  7:37   ` Ingo Molnar
2019-11-07  7:45     ` Thomas Gleixner
2019-11-07  8:16   ` Ingo Molnar
2019-11-07 18:02     ` Thomas Gleixner
2019-11-07 19:24   ` Brian Gerst
2019-11-07 19:54     ` Linus Torvalds
2019-11-07 21:00       ` Brian Gerst
2019-11-07 21:32         ` Thomas Gleixner
2019-11-07 23:20           ` hpa
2019-11-07 21:44         ` Linus Torvalds
2019-11-08  1:12           ` H. Peter Anvin
2019-11-08  2:12             ` Brian Gerst
2019-11-10 17:21           ` Andy Lutomirski
2019-11-06 19:35 ` [patch 6/9] x86/iopl: Fixup misleading comment Thomas Gleixner
2019-11-06 19:35 ` Thomas Gleixner [this message]
2019-11-07  9:09   ` [patch 7/9] x86/iopl: Restrict iopl() permission scope Peter Zijlstra
2019-11-10 17:26   ` Andy Lutomirski
2019-11-10 20:31     ` Thomas Gleixner
2019-11-10 21:05       ` Andy Lutomirski
2019-11-10 21:21         ` Thomas Gleixner
2019-11-11  4:27   ` kbuild test robot
2019-11-06 19:35 ` [patch 8/9] x86/iopl: Remove legacy IOPL option Thomas Gleixner
2019-11-07  6:11   ` Jürgen Groß
2019-11-07  6:26     ` hpa
2019-11-07 16:44     ` Stephen Hemminger
2019-11-07  9:13   ` Peter Zijlstra
2019-11-06 19:35 ` [patch 9/9] selftests/x86/iopl: Verify that CLI/STI result in #GP Thomas Gleixner
2019-11-07  7:28 ` [patch] x86/iopl: Remove unused local variable, update comments in ksys_ioperm() Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191106202806.425388355@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=torvalds@linux-foundation.org \
    --cc=w@1wt.eu \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.