linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Stephen Hemminger <stephen@networkplumber.org>,
	Willy Tarreau <w@1wt.eu>, Juergen Gross <jgross@suse.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: Re: [patch 7/9] x86/iopl: Restrict iopl() permission scope
Date: Sun, 10 Nov 2019 09:26:06 -0800	[thread overview]
Message-ID: <a1afc4bb-c90e-db58-42f2-da91a50b1872@kernel.org> (raw)
In-Reply-To: <20191106202806.425388355@linutronix.de>

On 11/6/19 11:35 AM, Thomas Gleixner wrote:
> The access to the full I/O port range can be also provided by the TSS I/O
> bitmap, but that would require to copy 8k of data on scheduling in the
> task. As shown with the sched out optimization TSS.io_bitmap_base can be
> used to switch the incoming task to a preallocated I/O bitmap which has all
> bits zero, i.e. allows access to all I/O ports.
> 
> Implementing this allows to provide an iopl() emulation mode which restricts
> the IOPL level 3 permissions to I/O port access but removes the STI/CLI
> permission which is coming with the hardware IOPL mechansim.
> 
> Provide a config option to switch IOPL to emulation mode, make it the
> default and while at it also provide an option to disable IOPL completely.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/Kconfig                 |   32 ++++++++++++++++
>  arch/x86/include/asm/processor.h |   24 +++++++++---
>  arch/x86/kernel/cpu/common.c     |    4 +-
>  arch/x86/kernel/ioport.c         |   75 ++++++++++++++++++++++++++++++---------
>  arch/x86/kernel/process.c        |   15 +++++--
>  5 files changed, 123 insertions(+), 27 deletions(-)
> 
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION
>  	 Disabling this option saves about 7K of kernel size and
>  	 possibly 4K of additional runtime pagetable memory.
>  
> +choice
> +	prompt "IOPL"
> +	default X86_IOPL_EMULATION
> +
> +config X86_IOPL_EMULATION
> +	bool "IOPL Emulation"
> +	---help---
> +	  Legacy IOPL support is an overbroad mechanism which allows user
> +	  space aside of accessing all 65536 I/O ports also to disable
> +	  interrupts. To gain this access the caller needs CAP_SYS_RAWIO
> +	  capabilities and permission from eventually active security
> +	  modules.
> +
> +	  The emulation restricts the functionality of the syscall to
> +	  only allowing the full range I/O port access, but prevents the
> +	  ability to disable interrupts from user space.
> +
> +config X86_IOPL_LEGACY
> +	bool "IOPL Legacy"
> +	---help---
> +	Allow the full IOPL permissions, i.e. user space access to all
> +	65536 I/O ports and also the ability to disable interrupts, which
> +	is overbroad and can result in system lockups.
> +
> +config X86_IOPL_NONE
> +	bool "IOPL None"
> +	---help---
> +	Disable the IOPL permission syscall. That's the safest option as
> +	no sane application should depend on this functionality.
> +
> +endchoice
> +
>  config TOSHIBA
>  	tristate "Toshiba Laptop support"
>  	depends on X86_32
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -331,8 +331,12 @@ struct x86_hw_tss {
>  #define IO_BITMAP_BYTES			(IO_BITMAP_BITS/8)
>  #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
>  
> -#define IO_BITMAP_OFFSET_VALID				\
> -	(offsetof(struct tss_struct, io_bitmap) -	\
> +#define IO_BITMAP_OFFSET_VALID_MAP			\
> +	(offsetof(struct tss_struct, io_bitmap_map) -	\
> +	 offsetof(struct tss_struct, x86_tss))
> +
> +#define IO_BITMAP_OFFSET_VALID_ALL			\
> +	(offsetof(struct tss_struct, io_bitmap_all) -	\
>  	 offsetof(struct tss_struct, x86_tss))
>  
>  /*
> @@ -343,7 +347,7 @@ struct x86_hw_tss {
>   * last valid byte
>   */
>  #define __KERNEL_TSS_LIMIT	\
> -	(IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> +	(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
>  
>  /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
>  #define IO_BITMAP_OFFSET_INVALID	(__KERNEL_TSS_LIMIT + 1)
> @@ -379,7 +383,8 @@ struct tss_struct {
>  	 * byte beyond the end of the I/O permission bitmap. The extra byte
>  	 * must have all bits set and must be within the TSS limit.
>  	 */
> -	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
> +	unsigned long		io_bitmap_map[IO_BITMAP_LONGS + 1];
> +	unsigned long		io_bitmap_all[IO_BITMAP_LONGS + 1];
>  } __aligned(PAGE_SIZE);
>  
>  DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
> @@ -495,7 +500,6 @@ struct thread_struct {
>  #endif
>  	/* IO permissions: */
>  	unsigned long		*io_bitmap_ptr;
> -	unsigned long		iopl;
>  	/*
>  	 * The byte range in the I/O permission bitmap which contains zero
>  	 * bits.
> @@ -503,6 +507,13 @@ struct thread_struct {
>  	unsigned int		io_zerobits_start;
>  	unsigned int		io_zerobits_end;
>  
> +	/*
> +	 * IOPL. Priviledge level dependent I/O permission which includes
> +	 * user space CLI/STI when granted.
> +	 */
> +	unsigned long		iopl;
> +	unsigned long		iopl_emul;
> +
>  	mm_segment_t		addr_limit;
>  
>  	unsigned int		sig_on_uaccess_err:1;
> @@ -524,6 +535,9 @@ static inline void arch_thread_struct_wh
>  	*size = fpu_kernel_xstate_size;
>  }
>  
> +extern void tss_update_io_bitmap(struct tss_struct *tss,
> +				 struct thread_struct *thread);
> +
>  /*
>   * Thread-synchronous status.
>   *
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1866,7 +1866,9 @@ void cpu_init(void)
>  	tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
>  	tss->io_zerobits_start = IO_BITMAP_BYTES;
>  	tss->io_zerobits_end = 0;
> -	memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
> +	memset(tss->io_bitmap_map, 0xff, sizeof(tss->io_bitmap_map));
> +	/* Invalidate the extra tail entry of the allow all I/O bitmap */
> +	tss->io_bitmap_all[IO_BITMAP_LONGS] = ~0UL;
>  	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
>  
>  	load_TR_desc();
> --- a/arch/x86/kernel/ioport.c
> +++ b/arch/x86/kernel/ioport.c
> @@ -109,7 +109,7 @@ long ksys_ioperm(unsigned long from, uns
>  	}
>  
>  	/* Copy the changed range over to the TSS bitmap */
> -	dst = (char *)tss->io_bitmap;
> +	dst = (char *)tss->io_bitmap_map;
>  	src = (char *)bitmap;
>  	memcpy(dst + copy_start, src + copy_start, copy_len);
>  
> @@ -130,7 +130,7 @@ long ksys_ioperm(unsigned long from, uns
>  		 * is correct. It might have been wreckaged by a VMEXiT.
>  		 */
>  		set_thread_flag(TIF_IO_BITMAP);
> -		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID;
> +		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID_MAP;
>  		refresh_tss_limit();
>  	}
>  
> @@ -184,36 +184,77 @@ SYSCALL_DEFINE3(ioperm, unsigned long, f
>   */
>  SYSCALL_DEFINE1(iopl, unsigned int, level)
>  {
> -	struct pt_regs *regs = current_pt_regs();
>  	struct thread_struct *t = &current->thread;
> +	struct pt_regs *regs = current_pt_regs();
> +	unsigned int old;
>  
>  	/*
>  	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
>  	 * and changing them has no effect.
>  	 */
> -	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
> +	if (IS_ENABLED(CONFIG_X86_IOPL_NONE))
> +		return -ENOSYS;
>  
>  	if (level > 3)
>  		return -EINVAL;
> +
> +	if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION))
> +		old = t->iopl_emul;
> +	else
> +		old = t->iopl >> X86_EFLAGS_IOPL_BIT;
> +
> +	/* No point in going further if nothing changes */
> +	if (level == old)
> +		return 0;
> +
>  	/* Trying to gain more privileges? */
>  	if (level > old) {
>  		if (!capable(CAP_SYS_RAWIO) ||
>  		    security_locked_down(LOCKDOWN_IOPORT))
>  			return -EPERM;
>  	}
> -	/*
> -	 * Change the flags value on the return stack, which has been set
> -	 * up on system-call entry. See also the fork and signal handling
> -	 * code how this is handled.
> -	 */
> -	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
> -		(level << X86_EFLAGS_IOPL_BIT);
> -	/* Store the new level in the thread struct */
> -	t->iopl = level << X86_EFLAGS_IOPL_BIT;
> -	/*
> -	 * X86_32 switches immediately and XEN handles it via emulation.
> -	 */
> -	set_iopl_mask(t->iopl);
> +
> +	if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) {
> +		struct tss_struct *tss;
> +		unsigned int tss_base;
> +
> +		/* Prevent racing against a task switch */
> +		preempt_disable();
> +		tss = this_cpu_ptr(&cpu_tss_rw);
> +		if (level == 3) {
> +			/* Grant access to all I/O ports */
> +			set_thread_flag(TIF_IO_BITMAP);
> +			tss_base = IO_BITMAP_OFFSET_VALID_ALL;

Where is the actual TSS updated?

> +		} else if (t->io_bitmap_ptr) {
> +			/* Thread has a I/O bitmap */
> +			tss_update_io_bitmap(tss, t);
> +			set_thread_flag(TIF_IO_BITMAP);
> +			tss_base = IO_BITMAP_OFFSET_VALID_MAP;
> +		} else {
> +			/* Take it out of the context switch work burden */
> +			clear_thread_flag(TIF_IO_BITMAP);
> +			tss_base = IO_BITMAP_OFFSET_INVALID;

Ditto.

I think what you need to do is have a single function, called by
exit_thread(), switch_to(), and here, that updates the TSS to match a
given task's IO bitmap state.  This is probably switch_to_bitmap() or
similar.

(Maybe it already is, but I swear I checked all the patches in the
series and I can't find the body of tss_update_io_bitmap().  But you
should call it in all branches of this if-else thing.)

  parent reply	other threads:[~2019-11-10 17:26 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-06 19:34 [patch 0/9] x86/iopl: Prevent user space from using CLI/STI with iopl(3) Thomas Gleixner
2019-11-06 19:35 ` [patch 1/9] x86/ptrace: Prevent truncation of bitmap size Thomas Gleixner
2019-11-07  7:31   ` Ingo Molnar
2019-11-06 19:35 ` [patch 2/9] x86/process: Unify copy_thread_tls() Thomas Gleixner
2019-11-08 22:31   ` Andy Lutomirski
2019-11-08 23:43     ` Thomas Gleixner
2019-11-10 12:36       ` Thomas Gleixner
2019-11-10 16:56         ` Andy Lutomirski
2019-11-11  8:52           ` Peter Zijlstra
2019-11-06 19:35 ` [patch 3/9] x86/cpu: Unify cpu_init() Thomas Gleixner
2019-11-08 22:34   ` Andy Lutomirski
2019-11-06 19:35 ` [patch 4/9] x86/io: Speedup schedule out of I/O bitmap user Thomas Gleixner
2019-11-07  9:12   ` Peter Zijlstra
2019-11-07 14:04     ` Thomas Gleixner
2019-11-07 14:08       ` Thomas Gleixner
2019-11-08 22:41         ` Andy Lutomirski
2019-11-08 23:45           ` Thomas Gleixner
2019-11-09  3:32             ` Andy Lutomirski
2019-11-10 12:43               ` Thomas Gleixner
2019-11-09  0:24   ` Andy Lutomirski
2019-11-06 19:35 ` [patch 5/9] x86/ioport: Reduce ioperm impact for sane usage further Thomas Gleixner
2019-11-07  1:11   ` Linus Torvalds
2019-11-07  7:44     ` Thomas Gleixner
2019-11-07  8:25     ` Ingo Molnar
2019-11-07  9:17       ` Willy Tarreau
2019-11-07 10:00         ` Thomas Gleixner
2019-11-07 10:13           ` Willy Tarreau
2019-11-07 10:19           ` hpa
2019-11-07 10:27             ` Willy Tarreau
2019-11-07 10:50               ` hpa
2019-11-07 12:56                 ` Willy Tarreau
2019-11-07 16:45                   ` Eric W. Biederman
2019-11-07 16:53                     ` Linus Torvalds
2019-11-07 16:57                     ` Willy Tarreau
2019-11-10 17:17       ` Andy Lutomirski
2019-11-07  7:37   ` Ingo Molnar
2019-11-07  7:45     ` Thomas Gleixner
2019-11-07  8:16   ` Ingo Molnar
2019-11-07 18:02     ` Thomas Gleixner
2019-11-07 19:24   ` Brian Gerst
2019-11-07 19:54     ` Linus Torvalds
2019-11-07 21:00       ` Brian Gerst
2019-11-07 21:32         ` Thomas Gleixner
2019-11-07 23:20           ` hpa
2019-11-07 21:44         ` Linus Torvalds
2019-11-08  1:12           ` H. Peter Anvin
2019-11-08  2:12             ` Brian Gerst
2019-11-10 17:21           ` Andy Lutomirski
2019-11-06 19:35 ` [patch 6/9] x86/iopl: Fixup misleading comment Thomas Gleixner
2019-11-06 19:35 ` [patch 7/9] x86/iopl: Restrict iopl() permission scope Thomas Gleixner
2019-11-07  9:09   ` Peter Zijlstra
2019-11-10 17:26   ` Andy Lutomirski [this message]
2019-11-10 20:31     ` Thomas Gleixner
2019-11-10 21:05       ` Andy Lutomirski
2019-11-10 21:21         ` Thomas Gleixner
2019-11-06 19:35 ` [patch 8/9] x86/iopl: Remove legacy IOPL option Thomas Gleixner
2019-11-07  6:11   ` Jürgen Groß
2019-11-07  6:26     ` hpa
2019-11-07 16:44     ` Stephen Hemminger
2019-11-07  9:13   ` Peter Zijlstra
2019-11-06 19:35 ` [patch 9/9] selftests/x86/iopl: Verify that CLI/STI result in #GP Thomas Gleixner
2019-11-07  7:28 ` [patch] x86/iopl: Remove unused local variable, update comments in ksys_ioperm() Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a1afc4bb-c90e-db58-42f2-da91a50b1872@kernel.org \
    --to=luto@kernel.org \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=stephen@networkplumber.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=w@1wt.eu \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).