All of lore.kernel.org
 help / color / mirror / Atom feed
From: Florian Weimer <fweimer@redhat.com>
To: Dave Hansen <dave.hansen@linux.intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	linux-x86_64@vger.kernel.org, linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>, Linux API <linux-api@vger.kernel.org>
Subject: Re: MPK: removing a pkey
Date: Thu, 23 Nov 2017 13:38:21 +0100	[thread overview]
Message-ID: <28f6e430-293d-4b30-dce6-018a2b3c03e8@redhat.com> (raw)
In-Reply-To: <d98eb4b8-6e59-513d-fdf8-3395485cb851@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 1906 bytes --]

On 11/22/2017 05:32 PM, Dave Hansen wrote:
> On 11/22/2017 08:21 AM, Florian Weimer wrote:
>> On 11/22/2017 05:10 PM, Dave Hansen wrote:
>>> On 11/22/2017 04:15 AM, Florian Weimer wrote:
>>>> On 11/22/2017 09:18 AM, Vlastimil Babka wrote:
>>>>> And, was the pkey == -1 internal wiring supposed to be exposed to the
>>>>> pkey_mprotect() signal, or should there have been a pre-check returning
>>>>> EINVAL in SYSCALL_DEFINE4(pkey_mprotect), before calling
>>>>> do_mprotect_pkey())? I assume it's too late to change it now anyway (or
>>>>> not?), so should we also document it?
>>>>
>>>> I think the -1 case to the set the default key is useful because it
>>>> allows you to use a key value of -1 to mean “MPK is not supported”, and
>>>> still call pkey_mprotect.
>>>
>>> The behavior to not allow 0 to be set was unintentional and is a bug.
>>> We should fix that.
>>
>> On the other hand, x86-64 has no single default protection key due to
>> the PROT_EXEC emulation.
> 
> No, the default is clearly 0 and documented to be so.  The PROT_EXEC
> emulation one should be inaccessible in all the APIs so does not even
> show up as *being* a key in the API.

I see key 1 in /proc for a PROT_EXEC mapping.  If I supply an explicit 
protection key, that key is used, and the page ends up having read 
access enabled.

The key is also visible in the siginfo_t argument on read access to a 
PROT_EXEC mapping with the default key, so it's not just /proc:

page 1 (0x7f008242d000): read access denied
   SIGSEGV address: 0x7f008242d000
   SIGSEGV code: 4
   SIGSEGV key: 1

I'm attaching my test.

 > The fact that it's implemented
 > with pkeys should be pretty immaterial other than the fact that you
 > can't touch the high bits in PKRU.

I don't see a restriction for PKRU updates.  If I write zero to the PKRU 
register, PROT_EXEC implies PROT_READ, as I would expect.

This is with kernel 4.14.

Florian

[-- Attachment #2: mpk-default.c --]
[-- Type: text/x-csrc, Size: 4720 bytes --]

#include <err.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>

#define PKEY_DISABLE_ACCESS 1
#define PKEY_DISABLE_WRITE 2

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
touch (void *buffer)
{
}

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
read_page (void *page)
{
  char buf[16];
  memcpy (buf, page, sizeof (buf));
  touch (buf);
}

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
write_page (void *page)
{
  memset (page, 0, 16);
  touch (page);
}

static volatile void *sigsegv_addr;
static volatile int sigsegv_code;
static volatile int sigsegv_pkey;
static sigjmp_buf sigsegv_jmp;

static void
sigsegv_handler (int signo, siginfo_t *info, void *arg)
{
  sigsegv_addr = info->si_addr;
  sigsegv_code = info->si_code;
  if (info->si_code == 4)
    {
      /* Guess the address of the protection key field.  */
      int *ppkey = 2 + ((int *)((&info->si_addr) + 1));
      sigsegv_pkey = *ppkey;
    }
  else
    sigsegv_pkey = -1;
  siglongjmp (sigsegv_jmp, 2);
}

static const struct sigaction sigsegv_sigaction =
  {
    .sa_flags = SA_RESETHAND | SA_SIGINFO,
    .sa_sigaction = &sigsegv_handler,
  };

/* Return the value of the PKRU register.  */
static inline unsigned int
pkey_read (void)
{
  unsigned int result;
  __asm__ volatile (".byte 0x0f, 0x01, 0xee"
                    : "=a" (result) : "c" (0) : "rdx");
  return result;
}

/* Overwrite the PKRU register with VALUE.  */
static inline void
pkey_write (unsigned int value)
{
  __asm__ volatile (".byte 0x0f, 0x01, 0xef"
                    : : "a" (value), "c" (0), "d" (0));
}

enum { page_count = 7 };
static void *pages[page_count];

static void
check_fault_1 (int page, const char *what, void (*op) (void *))
{
  unsigned pkru = pkey_read ();

  int result = sigsetjmp (sigsegv_jmp, 1);
  if (result == 0)
    {
      if (sigaction (SIGSEGV, &sigsegv_sigaction, NULL) != 0)
	err (1, "sigaction");
      op (pages[page]);
      printf ("page %d (%p): %s access allowed\n", page, pages[page], what);
      return;
    }
  else
    {
      if (signal (SIGSEGV, SIG_DFL) == SIG_ERR)
	err (1, "signal");
      printf ("page %d (%p): %s access denied\n", page, pages[page], what);
      printf ("  SIGSEGV address: %p\n", sigsegv_addr);
      printf ("  SIGSEGV code: %d\n", sigsegv_code);
      printf ("  SIGSEGV key: %d\n", sigsegv_pkey);
    }

  /* Preserve PKRU register value (clobbered by signal handler).  */
  pkey_write (pkru);
}

static void
check_fault (int page)
{
  check_fault_1 (page, "read", read_page);
  check_fault_1 (page, "write", write_page);
}

static void
dump_smaps (const char *what)
{
  printf ("info: *** BEGIN %s ***\n", what);
  FILE *fp = fopen ("/proc/self/smaps", "r");
  if (fp == NULL)
    err (1, "fopen");
  while (true)
    {
      int ch = fgetc (fp);
      if (ch == EOF)
	break;
      fputc (ch, stdout);
    }
  if (ferror (fp))
    err (1, "fgetc");
  if (fclose (fp) != 0)
    err (1, "fclose");
  printf ("info: *** END %s ***\n", what);
  fflush (stdout);
}

int
main (void)
{
  int protections[page_count] = 
    { PROT_READ | PROT_WRITE, PROT_EXEC, PROT_READ, PROT_READ,
      PROT_EXEC | PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_EXEC };
  for (int i = 0; i < page_count; ++i)
    {
      pages[i] = mmap (NULL, 1, protections[i],
		       MAP_ANON | MAP_PRIVATE, -1, 0);
      if (pages[i] == MAP_FAILED)
	err (1, "mmap");
      printf ("page %d: %p\n", i, pages[i]);
    }
      
  int key = syscall (SYS_pkey_alloc, 0, 0);
  if (key < 0)
    err (1, "pkey_alloc");
  printf ("key: %d\n", key);

  if (syscall (SYS_pkey_mprotect, pages[2], 1, PROT_READ, key) != 0)
    err (1, "pkey_mprotected (pages[2])");
  if (syscall (SYS_pkey_mprotect, pages[3], 1, PROT_EXEC, key) != 0)
    err (1, "pkey_mprotected (pages[3])");
  if (syscall (SYS_pkey_mprotect, pages[5], 1, PROT_EXEC | PROT_WRITE, key)
      != 0)
    err (1, "pkey_mprotected (pages[5])");
  if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, key) != 0)
    err (1, "pkey_mprotected (pages[6])");
  if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, -1) != 0)
    err (1, "pkey_mprotected (pages[6])");

  dump_smaps ("dump before faults");

  /* This succeeds because the page is mapped readable.  */
  puts ("info: performing accesses");
  fflush (stdout);
  for (int i = 0; i < page_count; ++i)
    check_fault (i);

  /* See what happens if we grant all access rights.  */
  puts ("info: setting PKRU to zero");
  fflush (stdout);
  pkey_write (0);

  for (int i = 0; i < page_count; ++i)
    check_fault (i);

  return 0;
}

WARNING: multiple messages have this Message-ID (diff)
From: Florian Weimer <fweimer@redhat.com>
To: Dave Hansen <dave.hansen@linux.intel.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	linux-x86_64@vger.kernel.org, linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>, Linux API <linux-api@vger.kernel.org>
Subject: Re: MPK: removing a pkey
Date: Thu, 23 Nov 2017 13:38:21 +0100	[thread overview]
Message-ID: <28f6e430-293d-4b30-dce6-018a2b3c03e8@redhat.com> (raw)
In-Reply-To: <d98eb4b8-6e59-513d-fdf8-3395485cb851@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 1906 bytes --]

On 11/22/2017 05:32 PM, Dave Hansen wrote:
> On 11/22/2017 08:21 AM, Florian Weimer wrote:
>> On 11/22/2017 05:10 PM, Dave Hansen wrote:
>>> On 11/22/2017 04:15 AM, Florian Weimer wrote:
>>>> On 11/22/2017 09:18 AM, Vlastimil Babka wrote:
>>>>> And, was the pkey == -1 internal wiring supposed to be exposed to the
>>>>> pkey_mprotect() signal, or should there have been a pre-check returning
>>>>> EINVAL in SYSCALL_DEFINE4(pkey_mprotect), before calling
>>>>> do_mprotect_pkey())? I assume it's too late to change it now anyway (or
>>>>> not?), so should we also document it?
>>>>
>>>> I think the -1 case to the set the default key is useful because it
>>>> allows you to use a key value of -1 to mean a??MPK is not supporteda??, and
>>>> still call pkey_mprotect.
>>>
>>> The behavior to not allow 0 to be set was unintentional and is a bug.
>>> We should fix that.
>>
>> On the other hand, x86-64 has no single default protection key due to
>> the PROT_EXEC emulation.
> 
> No, the default is clearly 0 and documented to be so.  The PROT_EXEC
> emulation one should be inaccessible in all the APIs so does not even
> show up as *being* a key in the API.

I see key 1 in /proc for a PROT_EXEC mapping.  If I supply an explicit 
protection key, that key is used, and the page ends up having read 
access enabled.

The key is also visible in the siginfo_t argument on read access to a 
PROT_EXEC mapping with the default key, so it's not just /proc:

page 1 (0x7f008242d000): read access denied
   SIGSEGV address: 0x7f008242d000
   SIGSEGV code: 4
   SIGSEGV key: 1

I'm attaching my test.

 > The fact that it's implemented
 > with pkeys should be pretty immaterial other than the fact that you
 > can't touch the high bits in PKRU.

I don't see a restriction for PKRU updates.  If I write zero to the PKRU 
register, PROT_EXEC implies PROT_READ, as I would expect.

This is with kernel 4.14.

Florian

[-- Attachment #2: mpk-default.c --]
[-- Type: text/x-csrc, Size: 4720 bytes --]

#include <err.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>

#define PKEY_DISABLE_ACCESS 1
#define PKEY_DISABLE_WRITE 2

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
touch (void *buffer)
{
}

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
read_page (void *page)
{
  char buf[16];
  memcpy (buf, page, sizeof (buf));
  touch (buf);
}

__attribute__ ((weak, noinline, noclone)) /* Compiler barrier.  */
void
write_page (void *page)
{
  memset (page, 0, 16);
  touch (page);
}

static volatile void *sigsegv_addr;
static volatile int sigsegv_code;
static volatile int sigsegv_pkey;
static sigjmp_buf sigsegv_jmp;

static void
sigsegv_handler (int signo, siginfo_t *info, void *arg)
{
  sigsegv_addr = info->si_addr;
  sigsegv_code = info->si_code;
  if (info->si_code == 4)
    {
      /* Guess the address of the protection key field.  */
      int *ppkey = 2 + ((int *)((&info->si_addr) + 1));
      sigsegv_pkey = *ppkey;
    }
  else
    sigsegv_pkey = -1;
  siglongjmp (sigsegv_jmp, 2);
}

static const struct sigaction sigsegv_sigaction =
  {
    .sa_flags = SA_RESETHAND | SA_SIGINFO,
    .sa_sigaction = &sigsegv_handler,
  };

/* Return the value of the PKRU register.  */
static inline unsigned int
pkey_read (void)
{
  unsigned int result;
  __asm__ volatile (".byte 0x0f, 0x01, 0xee"
                    : "=a" (result) : "c" (0) : "rdx");
  return result;
}

/* Overwrite the PKRU register with VALUE.  */
static inline void
pkey_write (unsigned int value)
{
  __asm__ volatile (".byte 0x0f, 0x01, 0xef"
                    : : "a" (value), "c" (0), "d" (0));
}

enum { page_count = 7 };
static void *pages[page_count];

static void
check_fault_1 (int page, const char *what, void (*op) (void *))
{
  unsigned pkru = pkey_read ();

  int result = sigsetjmp (sigsegv_jmp, 1);
  if (result == 0)
    {
      if (sigaction (SIGSEGV, &sigsegv_sigaction, NULL) != 0)
	err (1, "sigaction");
      op (pages[page]);
      printf ("page %d (%p): %s access allowed\n", page, pages[page], what);
      return;
    }
  else
    {
      if (signal (SIGSEGV, SIG_DFL) == SIG_ERR)
	err (1, "signal");
      printf ("page %d (%p): %s access denied\n", page, pages[page], what);
      printf ("  SIGSEGV address: %p\n", sigsegv_addr);
      printf ("  SIGSEGV code: %d\n", sigsegv_code);
      printf ("  SIGSEGV key: %d\n", sigsegv_pkey);
    }

  /* Preserve PKRU register value (clobbered by signal handler).  */
  pkey_write (pkru);
}

static void
check_fault (int page)
{
  check_fault_1 (page, "read", read_page);
  check_fault_1 (page, "write", write_page);
}

static void
dump_smaps (const char *what)
{
  printf ("info: *** BEGIN %s ***\n", what);
  FILE *fp = fopen ("/proc/self/smaps", "r");
  if (fp == NULL)
    err (1, "fopen");
  while (true)
    {
      int ch = fgetc (fp);
      if (ch == EOF)
	break;
      fputc (ch, stdout);
    }
  if (ferror (fp))
    err (1, "fgetc");
  if (fclose (fp) != 0)
    err (1, "fclose");
  printf ("info: *** END %s ***\n", what);
  fflush (stdout);
}

int
main (void)
{
  int protections[page_count] = 
    { PROT_READ | PROT_WRITE, PROT_EXEC, PROT_READ, PROT_READ,
      PROT_EXEC | PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_EXEC };
  for (int i = 0; i < page_count; ++i)
    {
      pages[i] = mmap (NULL, 1, protections[i],
		       MAP_ANON | MAP_PRIVATE, -1, 0);
      if (pages[i] == MAP_FAILED)
	err (1, "mmap");
      printf ("page %d: %p\n", i, pages[i]);
    }
      
  int key = syscall (SYS_pkey_alloc, 0, 0);
  if (key < 0)
    err (1, "pkey_alloc");
  printf ("key: %d\n", key);

  if (syscall (SYS_pkey_mprotect, pages[2], 1, PROT_READ, key) != 0)
    err (1, "pkey_mprotected (pages[2])");
  if (syscall (SYS_pkey_mprotect, pages[3], 1, PROT_EXEC, key) != 0)
    err (1, "pkey_mprotected (pages[3])");
  if (syscall (SYS_pkey_mprotect, pages[5], 1, PROT_EXEC | PROT_WRITE, key)
      != 0)
    err (1, "pkey_mprotected (pages[5])");
  if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, key) != 0)
    err (1, "pkey_mprotected (pages[6])");
  if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, -1) != 0)
    err (1, "pkey_mprotected (pages[6])");

  dump_smaps ("dump before faults");

  /* This succeeds because the page is mapped readable.  */
  puts ("info: performing accesses");
  fflush (stdout);
  for (int i = 0; i < page_count; ++i)
    check_fault (i);

  /* See what happens if we grant all access rights.  */
  puts ("info: setting PKRU to zero");
  fflush (stdout);
  pkey_write (0);

  for (int i = 0; i < page_count; ++i)
    check_fault (i);

  return 0;
}

  parent reply	other threads:[~2017-11-23 12:38 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-05 10:35 MPK: pkey_free and key reuse Florian Weimer
2017-11-05 10:35 ` Florian Weimer
2017-11-05 10:35 ` Florian Weimer
2017-11-08 20:41 ` Dave Hansen
2017-11-08 20:41   ` Dave Hansen
2017-11-09 14:48   ` Florian Weimer
2017-11-09 14:48     ` Florian Weimer
2017-11-09 14:48     ` Florian Weimer
2017-11-09 16:59     ` Dave Hansen
2017-11-09 16:59       ` Dave Hansen
2017-11-09 16:59       ` Dave Hansen
2017-11-23 12:48       ` Florian Weimer
2017-11-23 12:48         ` Florian Weimer
2017-11-23 13:07         ` Vlastimil Babka
2017-11-23 13:07           ` Vlastimil Babka
2017-11-23 15:25         ` Dave Hansen
2017-11-23 15:25           ` Dave Hansen
2017-11-24 14:55           ` Florian Weimer
2017-11-24 14:55             ` Florian Weimer
     [not found] ` <0f006ef4-a7b5-c0cf-5f58-d0fd1f911a54-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-11-22  8:18   ` MPK: removing a pkey (was: pkey_free and key reuse) Vlastimil Babka
2017-11-22  8:18     ` Vlastimil Babka
2017-11-22  8:18     ` Vlastimil Babka
2017-11-22 12:15     ` MPK: removing a pkey Florian Weimer
2017-11-22 12:15       ` Florian Weimer
2017-11-22 12:15       ` Florian Weimer
2017-11-22 12:46       ` Vlastimil Babka
2017-11-22 12:46         ` Vlastimil Babka
2017-11-22 12:46         ` Vlastimil Babka
     [not found]         ` <f0495f01-9821-ec36-56b4-333f109eb761-AlSwsSmVLrQ@public.gmane.org>
2017-11-22 12:49           ` Florian Weimer
2017-11-22 12:49             ` Florian Weimer
2017-11-22 12:49             ` Florian Weimer
2017-11-22 16:10       ` Dave Hansen
2017-11-22 16:10         ` Dave Hansen
2017-11-22 16:21         ` Florian Weimer
2017-11-22 16:21           ` Florian Weimer
2017-11-22 16:21           ` Florian Weimer
     [not found]           ` <9ec19ff3-86f6-7cfe-1a07-1ab1c5d9882c-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-11-22 16:32             ` Dave Hansen
2017-11-22 16:32               ` Dave Hansen
2017-11-22 16:32               ` Dave Hansen
2017-11-23  8:11               ` Vlastimil Babka
2017-11-23  8:11                 ` Vlastimil Babka
     [not found]                 ` <de93997a-7802-96cf-62e2-e59416e745ca-AlSwsSmVLrQ@public.gmane.org>
2017-11-23 15:00                   ` Dave Hansen
2017-11-23 15:00                     ` Dave Hansen
2017-11-23 15:00                     ` Dave Hansen
2017-11-23 21:42                     ` Vlastimil Babka
2017-11-23 21:42                       ` Vlastimil Babka
     [not found]                       ` <2d12777f-615a-8101-2156-cf861ec13aa7-AlSwsSmVLrQ@public.gmane.org>
2017-11-23 23:29                         ` Dave Hansen
2017-11-23 23:29                           ` Dave Hansen
2017-11-23 23:29                           ` Dave Hansen
2017-11-24  8:35                           ` Florian Weimer
2017-11-24  8:35                             ` Florian Weimer
2017-11-24  8:38                             ` Vlastimil Babka
2017-11-24  8:38                               ` Vlastimil Babka
2017-11-23 12:38               ` Florian Weimer [this message]
2017-11-23 12:38                 ` Florian Weimer
2017-11-23 15:09                 ` Dave Hansen
2017-11-23 15:09                   ` Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=28f6e430-293d-4b30-dce6-018a2b3c03e8@redhat.com \
    --to=fweimer@redhat.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-x86_64@vger.kernel.org \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.