linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Ram Pai <linuxram@linux.ibm.com>,
	Sandipan Das <sandipan@linux.ibm.com>,
	npiggin@gmail.com
Subject: [PATCH v4 02/22] powerpc/pkeys: Check vma before returning key fault error to the user
Date: Tue,  5 May 2020 12:47:09 +0530	[thread overview]
Message-ID: <20200505071729.54912-3-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20200505071729.54912-1-aneesh.kumar@linux.ibm.com>

If multiple threads in userspace keep changing the protection keys
mapping a range, there can be a scenario where kernel takes a key fault
but the pkey value found in the siginfo struct is a permissive one.

This can confuse the userspace as shown in the below test case.

/* use this to control the number of test iterations */

static void pkeyreg_set(int pkey, unsigned long rights)
{
	unsigned long reg, shift;

	shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
	asm volatile("mfspr	%0, 0xd" : "=r"(reg));
	reg &= ~(((unsigned long) PKEY_BITS_MASK) << shift);
	reg |= (rights & PKEY_BITS_MASK) << shift;
	asm volatile("mtspr	0xd, %0" : : "r"(reg));
}

static unsigned long pkeyreg_get(void)
{
	unsigned long reg;

	asm volatile("mfspr	%0, 0xd" : "=r"(reg));
	return reg;
}

static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
{
	return syscall(SYS_pkey_mprotect, addr, len, prot, pkey);
}

static int sys_pkey_alloc(unsigned long flags, unsigned long access_rights)
{
	return syscall(SYS_pkey_alloc, flags, access_rights);
}

static int sys_pkey_free(int pkey)
{
	return syscall(SYS_pkey_free, pkey);
}

static int faulting_pkey;
static int permissive_pkey;
static pthread_barrier_t pkey_set_barrier;
static pthread_barrier_t mprotect_barrier;

static void pkey_handle_fault(int signum, siginfo_t *sinfo, void *ctx)
{
	unsigned long pkeyreg;

	/* FIXME: printf is not signal-safe but for the current purpose,
	          it gets the job done. */
	printf("pkey: exp = %d, got = %d\n", faulting_pkey, sinfo->si_pkey);
	fflush(stdout);

	assert(sinfo->si_code == SEGV_PKUERR);
	assert(sinfo->si_pkey == faulting_pkey);

	/* clear pkey permissions to let the faulting instruction continue */
	pkeyreg_set(faulting_pkey, 0x0);
}

static void *do_mprotect_fault(void *p)
{
	unsigned long rights, pkeyreg, pgsize;
	unsigned int i;
	void *region;
	int pkey;

	srand(time(NULL));
	pgsize = sysconf(_SC_PAGESIZE);
	rights = PKEY_DISABLE_WRITE;
	region = p;

	/* allocate key, no permissions */
	assert((pkey = sys_pkey_alloc(0, PKEY_DISABLE_ACCESS)) > 0);
	pkeyreg_set(4, 0x0);

	/* cache the pkey here as the faulting pkey for future reference
	   in the signal handler */
	faulting_pkey = pkey;
	printf("%s: faulting pkey = %d\n", __func__, faulting_pkey);

	/* try to allocate, mprotect and free pkeys repeatedly */
	for (i = 0; i < NUM_ITERATIONS; i++) {
		/* sync up with the other thread here */
		pthread_barrier_wait(&pkey_set_barrier);

		/* make sure that the pkey used by the non-faulting thread
		   is made permissive for this thread's context too so that
		   no faults are triggered because it still might have been
		   set to a restrictive value */
//		pkeyreg_set(permissive_pkey, 0x0);

		/* sync up with the other thread here */
		pthread_barrier_wait(&mprotect_barrier);

		/* perform mprotect */
		assert(!sys_pkey_mprotect(region, pgsize, PROT_READ | PROT_WRITE, pkey));

		/* choose a random byte from the protected region and
		   attempt to write to it, this will generate a fault */
		*((char *) region + (rand() % pgsize)) = rand();

		/* restore pkey permissions as the signal handler may have
		   cleared the bit out for the sake of continuing */
		pkeyreg_set(pkey, PKEY_DISABLE_WRITE);
	}

	/* free pkey */
	sys_pkey_free(pkey);

	return NULL;
}

static void *do_mprotect_nofault(void *p)
{
	unsigned long pgsize;
	unsigned int i, j;
	void *region;
	int pkey;

	pgsize = sysconf(_SC_PAGESIZE);
	region = p;

	/* try to allocate, mprotect and free pkeys repeatedly */
	for (i = 0; i < NUM_ITERATIONS; i++) {
		/* allocate pkey, all permissions */
		assert((pkey = sys_pkey_alloc(0, 0)) > 0);
		permissive_pkey = pkey;

		/* sync up with the other thread here */
		pthread_barrier_wait(&pkey_set_barrier);
		pthread_barrier_wait(&mprotect_barrier);

		/* perform mprotect on the common page, no faults will
		   be triggered as this is most permissive */
		assert(!sys_pkey_mprotect(region, pgsize, PROT_READ | PROT_WRITE, pkey));

		/* free pkey */
		assert(!sys_pkey_free(pkey));
	}

	return NULL;
}

int main(int argc, char **argv)
{
	pthread_t fault_thread, nofault_thread;
	unsigned long pgsize;
	struct sigaction act;
	pthread_attr_t attr;
	cpu_set_t fault_cpuset, nofault_cpuset;
	unsigned int i;
	void *region;

	/* allocate memory region to protect */
	pgsize = sysconf(_SC_PAGESIZE);
	assert(region = memalign(pgsize, pgsize));

	CPU_ZERO(&fault_cpuset);
	CPU_SET(0, &fault_cpuset);
	CPU_ZERO(&nofault_cpuset);
	CPU_SET(8, &nofault_cpuset);
	assert(!pthread_attr_init(&attr));

	/* setup sigsegv signal handler */
	act.sa_handler = 0;
	act.sa_sigaction = pkey_handle_fault;
	assert(!sigprocmask(SIG_SETMASK, 0, &act.sa_mask));
	act.sa_flags = SA_SIGINFO;
	act.sa_restorer = 0;
	assert(!sigaction(SIGSEGV, &act, NULL));

	/* setup barrier for the two threads */
	pthread_barrier_init(&pkey_set_barrier, NULL, 2);
	pthread_barrier_init(&mprotect_barrier, NULL, 2);

	/* setup and start threads */
	assert(!pthread_create(&fault_thread, &attr, &do_mprotect_fault, region));
	assert(!pthread_setaffinity_np(fault_thread, sizeof(cpu_set_t), &fault_cpuset));
	assert(!pthread_create(&nofault_thread, &attr, &do_mprotect_nofault, region));
	assert(!pthread_setaffinity_np(nofault_thread, sizeof(cpu_set_t), &nofault_cpuset));

	/* cleanup */
	assert(!pthread_attr_destroy(&attr));
	assert(!pthread_join(fault_thread, NULL));
	assert(!pthread_join(nofault_thread, NULL));
	assert(!pthread_barrier_destroy(&pkey_set_barrier));
	assert(!pthread_barrier_destroy(&mprotect_barrier));
	free(region);

	puts("PASS");

	return EXIT_SUCCESS;
}

The above test can result the below failure without this patch.

pkey: exp = 3, got = 3
pkey: exp = 3, got = 4
a.out: pkey-siginfo-race.c:100: pkey_handle_fault: Assertion `sinfo->si_pkey == faulting_pkey' failed.
Aborted

Check for vma access before considering this a key fault. If vma pkey allow
access retry the acess again.

Test case is written by Sandipan Das <sandipan@linux.ibm.com> hence added SOB
from him.

Cc: Ram Pai <linuxram@linux.ibm.com>
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/mm/fault.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8e529e4708e1..44457bae77a0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -319,14 +319,6 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
 static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
 			      struct vm_area_struct *vma)
 {
-	/*
-	 * Read or write was blocked by protection keys.  This is
-	 * always an unconditional error and can never result in
-	 * a follow-up action to resolve the fault, like a COW.
-	 */
-	if (is_pkey)
-		return true;
-
 	/*
 	 * Make sure to check the VMA so that we do not perform
 	 * faults just to hit a pkey fault as soon as we fill in a
-- 
2.26.2


  parent reply	other threads:[~2020-05-05  7:29 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-05  7:17 [PATCH v4 00/22] Avoid IPI while updating page table entries Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 01/22] powerpc/pkeys: Avoid using lockless page table walk Aneesh Kumar K.V
2020-05-13  8:49   ` Michael Ellerman
2020-05-05  7:17 ` Aneesh Kumar K.V [this message]
2020-05-05  7:17 ` [PATCH v4 03/22] powerpc/mm/hash64: use _PAGE_PTE when checking for pte_present Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 04/22] powerpc/hash64: Restrict page table lookup using init_mm with __flush_hash_table_range Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 05/22] powerpc/book3s64/hash: Use the pte_t address from the caller Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 06/22] powerpc/mce: Don't reload pte val in addr_to_pfn Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 07/22] powerpc/perf/callchain: Use __get_user_pages_fast in read_user_stack_slow Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 08/22] powerpc/kvm/book3s: switch from raw_spin_*lock to arch_spin_lock Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 09/22] powerpc/kvm/book3s: Add helper to walk partition scoped linux page table Aneesh Kumar K.V
2020-05-28  1:43   ` Paul Mackerras
2020-05-28  6:01     ` Aneesh Kumar K.V
2020-05-28  7:25       ` Paul Mackerras
2020-05-05  7:17 ` [PATCH v4 10/22] powerpc/kvm/nested: Add helper to walk nested shadow " Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 11/22] powerpc/kvm/book3s: Use kvm helpers to walk shadow or secondary table Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 12/22] powerpc/kvm/book3s: Add helper for host page table walk Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 13/22] powerpc/kvm/book3s: Use find_kvm_host_pte in page fault handler Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 14/22] powerpc/kvm/book3s: Use find_kvm_host_pte in h_enter Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 15/22] powerpc/kvm/book3s: use find_kvm_host_pte in pute_tce functions Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 16/22] powerpc/kvm/book3s: Avoid using rmap to protect parallel page table update Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 17/22] powerpc/kvm/book3s: use find_kvm_host_pte in kvmppc_book3s_instantiate_page Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 18/22] powerpc/kvm/book3s: Use find_kvm_host_pte in kvmppc_get_hpa Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 19/22] powerpc/kvm/book3s: Use pte_present instead of opencoding _PAGE_PRESENT check Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 20/22] powerpc/mm/book3s64: Avoid sending IPI on clearing PMD Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 21/22] mm: change pmdp_huge_get_and_clear_full take vm_area_struct as arg Aneesh Kumar K.V
2020-05-05  7:17 ` [PATCH v4 22/22] powerpc/mm/book3s64: Fix MADV_DONTNEED and parallel page fault race Aneesh Kumar K.V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200505071729.54912-3-aneesh.kumar@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=linuxram@linux.ibm.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=sandipan@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).