linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC, PATCH] fix verify_write for 80386 support
@ 2003-04-21 20:28 Manfred Spraul
  2003-04-21 20:36 ` Manfred Spraul
  0 siblings, 1 reply; 3+ messages in thread
From: Manfred Spraul @ 2003-04-21 20:28 UTC (permalink / raw)
  To: linux-kernel

Real 80386 cpus have a special feature: they ignore the write-protected 
bit in the page tables if the kernel runs at ring 0. This feature breaks 
COW, the software must manually check the write protected bit before 
writing.

Right now the check happens only in access_ok(VERIFY_WRITE).
This is broken, because:
- some codepaths call access_ok(VERIFY_READ), and then write.
- kswapd might change the page permissions between access_ok and the 
actual write access.



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC, PATCH] fix verify_write for 80386 support
  2003-04-21 20:28 [RFC, PATCH] fix verify_write for 80386 support Manfred Spraul
@ 2003-04-21 20:36 ` Manfred Spraul
  2003-04-22  2:13   ` Brian Gerst
  0 siblings, 1 reply; 3+ messages in thread
From: Manfred Spraul @ 2003-04-21 20:36 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 697 bytes --]

[ 2nd part of the mail, after hitting the send button in the middle of 
the sentence]

The solution is to perform the check for write protection in the actual 
access functions, not during access_ok.

The attached patch does that:
- remove the check from access_ok
- redirect all user space write access into __copy_to_user_ll
- within __copy_to_user_ll: use get_user_pages, and write directly to 
the obtained kernel address.

This fixes all swapout & data corruption bugs, and even removes 26 lines 
from the kernel.

What do you think? The alternative would be to drop support for real 
80386 cpus.

Not tested on a real 80386, I've just replace wp_works_ok with !wp_works_ok.

--
    Manfred


[-- Attachment #2: patch-uaccess-386 --]
[-- Type: text/plain, Size: 5698 bytes --]

 arch/i386/lib/usercopy.c   |   47 ++++++++++++++++++++++++++
 arch/i386/mm/fault.c       |   81 ---------------------------------------------
 include/asm-i386/uaccess.h |   30 +++++++---------
 3 files changed, 61 insertions(+), 97 deletions(-)
// $Header$
// Kernel Version:
//  VERSION = 2
//  PATCHLEVEL = 5
//  SUBLEVEL = 68
//  EXTRAVERSION =
--- 2.5/include/asm-i386/uaccess.h	2003-04-20 11:13:45.000000000 +0200
+++ build-2.5/include/asm-i386/uaccess.h	2003-04-20 18:43:38.000000000 +0200
@@ -62,8 +62,6 @@
 		:"1" (addr),"g" ((int)(size)),"g" (current_thread_info()->addr_limit.seg)); \
 	flag; })
 
-#ifdef CONFIG_X86_WP_WORKS_OK
-
 /**
  * access_ok: - Checks if a user space pointer is valid
  * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
@@ -85,14 +83,6 @@
  */
 #define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
 
-#else
-
-#define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \
-			 ((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \
-			  __verify_write((void *)(addr),(size))))
-
-#endif
-
 /**
  * verify_area: - Obsolete, use access_ok()
  * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE
@@ -191,14 +181,8 @@
 	__ret_gu;							\
 })
 
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-
 extern void __put_user_bad(void);
 
-
 /**
  * put_user: - Write a simple value into user space.
  * @x:   Value to copy to user space.
@@ -299,6 +283,8 @@
 		: "=r"(err)					\
 		: "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
 
+#ifdef CONFIG_X86_WP_WORKS_OK
+
 #define __put_user_size(x,ptr,size,retval,errret)			\
 do {									\
 	retval = 0;							\
@@ -311,6 +297,18 @@
 	}								\
 } while (0)
 
+#else
+
+#define __put_user_size(x,ptr,size,retval,errret)			\
+do {									\
+	__typeof__(*(ptr)) __pus_tmp = x;				\
+	retval = 0;							\
+									\
+	if(unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0))	\
+		retval = errret;					\
+} while (0)
+
+#endif
 struct __large_struct { unsigned long buf[100]; };
 #define __m(x) (*(struct __large_struct *)(x))
 
--- 2.5/arch/i386/mm/fault.c	2003-04-20 11:19:13.000000000 +0200
+++ build-2.5/arch/i386/mm/fault.c	2003-04-20 16:18:16.000000000 +0200
@@ -29,87 +29,6 @@
 
 extern void die(const char *,struct pt_regs *,long);
 
-#ifndef CONFIG_X86_WP_WORKS_OK
-/*
- * Ugly, ugly, but the goto's result in better assembly..
- */
-int __verify_write(const void * addr, unsigned long size)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct * vma;
-	unsigned long start = (unsigned long) addr;
-
-	if (!size || segment_eq(get_fs(),KERNEL_DS))
-		return 1;
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(current->mm, start);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start > start)
-		goto check_stack;
-
-good_area:
-	if (!(vma->vm_flags & VM_WRITE))
-		goto bad_area;
-	size--;
-	size += start & ~PAGE_MASK;
-	size >>= PAGE_SHIFT;
-	start &= PAGE_MASK;
-
-	for (;;) {
-	survive:
-		switch (handle_mm_fault(current->mm, vma, start, 1)) {
-			case VM_FAULT_SIGBUS:
-				goto bad_area;
-			case VM_FAULT_OOM:
-				goto out_of_memory;
-			case VM_FAULT_MINOR:
-			case VM_FAULT_MAJOR:
-				break;
-			default:
-				BUG();
-		}
-		if (!size)
-			break;
-		size--;
-		start += PAGE_SIZE;
-		if (start < vma->vm_end)
-			continue;
-		vma = vma->vm_next;
-		if (!vma || vma->vm_start != start)
-			goto bad_area;
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;;
-	}
-	/*
-	 * We really need to hold mmap_sem over the whole access to
-	 * userspace, else another thread could change permissions.
-	 * This is unfixable, so don't use i386-class machines for
-	 * critical servers.
-	 */
-	up_read(&mm->mmap_sem);
-	return 1;
-
-check_stack:
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (expand_stack(vma, start) == 0)
-		goto good_area;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-	return 0;
-
-out_of_memory:
-	if (current->pid == 1) {
-		yield();
-		goto survive;
-	}
-	goto bad_area;
-}
-#endif
-
 /*
  * Unlock any spinlocks which will prevent us from getting the
  * message out 
--- 2.5/arch/i386/lib/usercopy.c	2003-03-25 17:49:34.000000000 +0100
+++ build-2.5/arch/i386/lib/usercopy.c	2003-04-20 19:45:51.000000000 +0200
@@ -6,6 +6,8 @@
  * Copyright 1997 Linus Torvalds
  */
 #include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 
@@ -483,6 +485,51 @@
 
 unsigned long __copy_to_user_ll(void *to, const void *from, unsigned long n)
 {
+#ifndef CONFIG_X86_WP_WORKS_OK
+	if (unlikely(boot_cpu_data.wp_works_ok == 0) && ((unsigned long )to) < TASK_SIZE) {
+		/* 
+		 * CPU does not honor the WP bit when writing
+		 * from supervisory mode, and due to preemption or SMP,
+		 * the page tables can change at any time.
+		 * Do it manually.	Manfred <manfred@colorfullife.com>
+		 */
+		while (n) {
+		      	unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
+			unsigned long len = PAGE_SIZE - offset;
+			int retval;
+			struct page *pg;
+			void *maddr;
+			
+			if (len > n)
+				len = n;
+
+survive:
+			down_read(&current->mm->mmap_sem);
+			retval = get_user_pages(current, current->mm,
+					(unsigned long )to, 1, 1, 0, &pg, NULL);
+			up_read(&current->mm->mmap_sem);
+
+			if (retval == -ENOMEM && current->pid == 1) {
+				yield();
+				goto survive;
+			}
+
+			if (retval != 1)
+		       		break;
+
+			maddr = kmap(pg);
+			memcpy(maddr + offset, from, len);
+			kunmap(pg);
+			set_page_dirty_lock(pg);
+			put_page(pg);
+
+			from += len;
+			to += len;
+			n -= len;
+		}
+		return n;
+	}
+#endif
 	if (movsl_is_ok(to, from, n))
 		__copy_user(to, from, n);
 	else

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC, PATCH] fix verify_write for 80386 support
  2003-04-21 20:36 ` Manfred Spraul
@ 2003-04-22  2:13   ` Brian Gerst
  0 siblings, 0 replies; 3+ messages in thread
From: Brian Gerst @ 2003-04-22  2:13 UTC (permalink / raw)
  To: Manfred Spraul; +Cc: linux-kernel

Manfred Spraul wrote:
> [ 2nd part of the mail, after hitting the send button in the middle of 
> the sentence]
> 
> The solution is to perform the check for write protection in the actual 
> access functions, not during access_ok.
> 
> The attached patch does that:
> - remove the check from access_ok
> - redirect all user space write access into __copy_to_user_ll
> - within __copy_to_user_ll: use get_user_pages, and write directly to 
> the obtained kernel address.
> 
> This fixes all swapout & data corruption bugs, and even removes 26 lines 
> from the kernel.
> 
> What do you think? The alternative would be to drop support for real 
> 80386 cpus.
> 
> Not tested on a real 80386, I've just replace wp_works_ok with 
> !wp_works_ok.
> 
> -- 
>    Manfred
> 

Close, but there is still a race here.  You need to hold mmap_sem over 
the memcpy.  Another thread will expect the memory to not be written to 
after write-protecting it.

--
				Brian Gerst


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2003-04-22  2:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-21 20:28 [RFC, PATCH] fix verify_write for 80386 support Manfred Spraul
2003-04-21 20:36 ` Manfred Spraul
2003-04-22  2:13   ` Brian Gerst

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).