linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch -mm series] ia64 specific /dev/mem handlers
@ 2005-02-22  9:52 Jes Sorensen
  2005-02-22 10:03 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22  9:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-ia64, linux-kernel

Hi,

This patch introduces ia64 specific read/write handlers for /dev/mem
access which is needed to avoid uncached pages to be accessed through
the cached kernel window which can lead to random corruption. It also
introduces a new page-flag PG_uncached which will be used to mark the
uncached pages. I assume this may be useful to other architectures as
well where the CPU may use speculative reads which conflict with
uncached access. In addition I moved do_write_mem to be under
ARCH_HAS_DEV_MEM as it's only ever used if that is defined.

The patch is needed for the new ia64 special memory driver (mspec -
former fetchop).

Patch is relative to 2.6.11-rc3-mm2 and relies on the ARCH_HAS_DEV_MEM
flag which isn't in Linus' nor Tony's trees yet.

Cheers,
Jes

Signed-off-by: Jes Sorensen <jes@wildopensource.com>

diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile	2005-02-16 11:20:19 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile	2005-02-16 11:58:35 -08:00
@@ -7,7 +7,7 @@
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
 	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
-	 unwind.o mca.o mca_asm.o topology.o
+	 unwind.o mca.o mca_asm.o topology.o mem.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
 obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c	1969-12-31 16:00:00 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c	2005-02-16 12:49:01 -08:00
@@ -0,0 +1,151 @@
+/*
+ *  arch/ia64/kernel/mem.c
+ *
+ *  IA64 specific  portions of /dev/mem access, notably handling
+ *  read/write from uncached memory
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2005 Jes Sorensen <jes@wildopensource.com>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
+extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
+extern int open_port(struct inode * inode, struct file * filp);
+
+
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+	unsigned long cursor;
+
+	cursor = from >> PAGE_SHIFT;
+	while ((cursor << PAGE_SHIFT) < to) {
+		if (!devmem_is_allowed(cursor))
+			return 0;
+		cursor++;
+	}
+	return 1;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly
+ * to the memory location. 
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+			size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read, sz;
+	struct page *page;
+	char *ptr;
+
+
+	if (!valid_phys_addr_range(p, &count))
+		return -EFAULT;
+	read = 0;
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		if (page->flags & PG_uncached)
+			ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+		else
+			ptr = __va(p);
+		if (copy_to_user(buf, ptr, sz))
+			return -EFAULT;
+		buf += sz;
+		p += sz;
+		count -= sz;
+		read += sz;
+	}
+
+	*ppos += read;
+	return read;
+}
+
+
+static ssize_t write_mem(struct file * file, const char __user * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long copied;
+	ssize_t written, sz;
+	struct page *page;
+	char *ptr;
+
+	if (!valid_phys_addr_range(p, &count))
+		return -EFAULT;
+
+	written = 0;
+
+	if (!range_is_allowed(p, p + count))
+		return -EPERM;
+	/*
+	 * Need virtual p below here
+	 */
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		if (page->flags & PG_uncached)
+			ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+		else
+			ptr = __va(p);
+
+		copied = copy_from_user(ptr, buf, sz);
+		if (copied) {
+			ssize_t ret;
+
+			ret = written + (sz - copied);
+			if (ret)
+				return ret;
+			return -EFAULT;
+		}
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+
+	*ppos += written;
+	return written;
+}
+
+
+struct file_operations mem_fops = {
+	.llseek		= memory_lseek,
+	.read		= read_mem,
+	.write		= write_mem,
+	.mmap		= mmap_kmem,
+	.open		= open_port,
+};
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c	2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2/drivers/char/mem.c	2005-02-16 11:58:35 -08:00
@@ -125,39 +125,7 @@
 	}
 	return 1;
 }
-static ssize_t do_write_mem(void *p, unsigned long realp,
-			    const char __user * buf, size_t count, loff_t *ppos)
-{
-	ssize_t written;
-	unsigned long copied;
-
-	written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-	/* we don't have page 0 mapped on sparc and m68k.. */
-	if (realp < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-realp;
-		if (sz > count) sz = count; 
-		/* Hmm. Do something? */
-		buf+=sz;
-		p+=sz;
-		count-=sz;
-		written+=sz;
-	}
-#endif
-	if (!range_is_allowed(realp, realp+count))
-		return -EPERM;
-	copied = copy_from_user(p, buf, count);
-	if (copied) {
-		ssize_t ret = written + (count - copied);
 
-		if (ret)
-			return ret;
-		return -EFAULT;
-	}
-	written += count;
-	*ppos += written;
-	return written;
-}
 
 #ifndef ARCH_HAS_DEV_MEM
 /*
@@ -196,6 +164,40 @@
 	return read;
 }
 
+static ssize_t do_write_mem(void *p, unsigned long realp,
+			    const char __user * buf, size_t count, loff_t *ppos)
+{
+	ssize_t written;
+	unsigned long copied;
+
+	written = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (realp < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-realp;
+		if (sz > count) sz = count; 
+		/* Hmm. Do something? */
+		buf+=sz;
+		p+=sz;
+		count-=sz;
+		written+=sz;
+	}
+#endif
+	if (!range_is_allowed(realp, realp+count))
+		return -EPERM;
+	copied = copy_from_user(p, buf, count);
+	if (copied) {
+		ssize_t ret = written + (count - copied);
+
+		if (ret)
+			return ret;
+		return -EFAULT;
+	}
+	written += count;
+	*ppos += written;
+	return written;
+}
+
 static ssize_t write_mem(struct file * file, const char __user * buf, 
 			 size_t count, loff_t *ppos)
 {
@@ -207,7 +209,8 @@
 }
 #endif
 
-static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+
+int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
 #ifdef pgprot_noncached
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
@@ -553,7 +556,7 @@
  * also note that seeking relative to the "end of file" isn't supported:
  * it has no meaning, so it returns -EINVAL.
  */
-static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+loff_t memory_lseek(struct file * file, loff_t offset, int orig)
 {
 	loff_t ret;
 
@@ -576,7 +579,7 @@
 	return ret;
 }
 
-static int open_port(struct inode * inode, struct file * filp)
+int open_port(struct inode * inode, struct file * filp)
 {
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h linux-2.6.11-rc3-mm2/include/asm-ia64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2/include/asm-ia64/io.h	2005-02-16 11:58:35 -08:00
@@ -481,4 +481,6 @@
 #define BIO_VMERGE_BOUNDARY	(ia64_max_iommu_merge_mask + 1)
 #endif
 
+#define ARCH_HAS_DEV_MEM
+
 #endif /* _ASM_IA64_IO_H */
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h	2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2/include/linux/page-flags.h	2005-02-16 12:48:31 -08:00
@@ -76,7 +76,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
-
+#define PG_uncached		20	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22  9:52 [patch -mm series] ia64 specific /dev/mem handlers Jes Sorensen
@ 2005-02-22 10:03 ` Andrew Morton
  2005-02-22 12:08   ` Andi Kleen
  2005-02-22 14:41   ` Jes Sorensen
  2005-02-22 22:03 ` Arjan van de Ven
  2005-03-03 11:37 ` Andrew Morton
  2 siblings, 2 replies; 26+ messages in thread
From: Andrew Morton @ 2005-02-22 10:03 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: linux-ia64, linux-kernel

jes@trained-monkey.org (Jes Sorensen) wrote:
>
> Hi,
> 
> This patch introduces ia64 specific read/write handlers for /dev/mem
> access which is needed to avoid uncached pages to be accessed through
> the cached kernel window which can lead to random corruption. It also
> introduces a new page-flag PG_uncached which will be used to mark the
> uncached pages. I assume this may be useful to other architectures as
> well where the CPU may use speculative reads which conflict with
> uncached access. In addition I moved do_write_mem to be under
> ARCH_HAS_DEV_MEM as it's only ever used if that is defined.
> 
> The patch is needed for the new ia64 special memory driver (mspec -
> former fetchop).
> 
> Patch is relative to 2.6.11-rc3-mm2 and relies on the ARCH_HAS_DEV_MEM
> flag which isn't in Linus' nor Tony's trees yet.

Is it possible to avoid consuming a page flag?

If this is an ia64-only (or 64-bit-only) thing I guess we could use bit 32.

> +		if (page->flags & PG_uncached)

dude.  That ain't gonna work ;)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 10:03 ` Andrew Morton
@ 2005-02-22 12:08   ` Andi Kleen
  2005-02-22 14:41   ` Jes Sorensen
  1 sibling, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2005-02-22 12:08 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-ia64, linux-kernel

Andrew Morton <akpm@osdl.org> writes:
>
> Is it possible to avoid consuming a page flag?
>
> If this is an ia64-only (or 64-bit-only) thing I guess we could use bit 32.

It's not. i386 essentially has the same problem. Other architectures
likely too. We definitely need a generic solution, especially when
we're going to support PAT on i386/x86-64 better (which I hope will
happen soon)

-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 10:03 ` Andrew Morton
  2005-02-22 12:08   ` Andi Kleen
@ 2005-02-22 14:41   ` Jes Sorensen
  2005-02-22 17:52     ` Matthew Wilcox
  1 sibling, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22 14:41 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> jes@trained-monkey.org (Jes Sorensen) wrote:
>>  Hi,
>> 
>> This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption. It also introduces a new page-flag PG_uncached which
>> will be used to mark the uncached pages. I assume this may be
>> useful to other architectures as well where the CPU may use
>> speculative reads which conflict with uncached access. In addition
>> I moved do_write_mem to be under ARCH_HAS_DEV_MEM as it's only ever
>> used if that is defined.

Andrew> Is it possible to avoid consuming a page flag?

Andrew> If this is an ia64-only (or 64-bit-only) thing I guess we
Andrew> could use bit 32.

Hi Andrew,

I don't think we can get away with it, I think most modern
architectures have this problem. I've been trying to avoid it for
several iterations but I kept getting pushed towards this.

>> + if (page->flags & PG_uncached)

Andrew> dude.  That ain't gonna work ;)

Pardon my lack of clue, but why not?

Maybe I should explain how I use it - in the mspec driver I pull a
chunk of pages out using alloc_pages and convert them all to uncached
(I need to do a chunk due to the speculation restrictions on ia64) and
then stick them into a special allocator and set page->PG_uncached
when doing so (the allocator is not tied to the uncached so I posted
the patch to Tony Luck). Ie. once the pages have been grabbed with
alloc_pages() and converted to uncached, they are never put back into
the generic pool.

It may be there are other places which needs to learn to respect the
flag?

Cheers,
Jes


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 14:41   ` Jes Sorensen
@ 2005-02-22 17:52     ` Matthew Wilcox
  2005-02-22 19:25       ` Andrew Morton
  0 siblings, 1 reply; 26+ messages in thread
From: Matthew Wilcox @ 2005-02-22 17:52 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, linux-ia64, linux-kernel

On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
> >> + if (page->flags & PG_uncached)
> 
> Andrew> dude.  That ain't gonna work ;)
> 
> Pardon my lack of clue, but why not?

I think you're supposed to always use test_bit() to check page flags

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 17:52     ` Matthew Wilcox
@ 2005-02-22 19:25       ` Andrew Morton
  2005-02-22 19:35         ` Dave Hansen
                           ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Andrew Morton @ 2005-02-22 19:25 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: jes, linux-ia64, linux-kernel

Matthew Wilcox <matthew@wil.cx> wrote:
>
> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>  > >> + if (page->flags & PG_uncached)
>  > 
>  > Andrew> dude.  That ain't gonna work ;)
>  > 
>  > Pardon my lack of clue, but why not?

	if (page->flags & (1<<PG_uncached))

would have been correcter.

>  I think you're supposed to always use test_bit() to check page flags

Yup.  Add PageUncached macros to page-flags.h.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 19:25       ` Andrew Morton
@ 2005-02-22 19:35         ` Dave Hansen
  2005-02-22 21:38           ` Jes Sorensen
  2005-02-22 21:34         ` Jes Sorensen
  2005-02-22 22:39         ` Jes Sorensen
  2 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2005-02-22 19:35 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Matthew Wilcox, jes, linux-ia64, Linux Kernel Mailing List

I was talking with Nigel Cunningham about doing something a little
different from the classic page flag bits when the number of users is
restricted and performance isn't ultra-critical.  Would something like
this work for you, instead of using a real page->flags bit for
PG_cached?

http://marc.theaimsgroup.com/?l=linux-kernel&m=110878103926956&w=2

BTW, they're planning on using two of those dynamic flags for software
suspend, and I'll probably use at least another two in the memory
hotplug code.  

-- Dave


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 19:25       ` Andrew Morton
  2005-02-22 19:35         ` Dave Hansen
@ 2005-02-22 21:34         ` Jes Sorensen
  2005-02-22 22:39         ` Jes Sorensen
  2 siblings, 0 replies; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22 21:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Matthew Wilcox, linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> Matthew Wilcox <matthew@wil.cx> wrote:
>> 
>> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> > >> + if (page->flags & PG_uncached)
>> > 
>> > Andrew> dude.  That ain't gonna work ;)
>> > 
>> > Pardon my lack of clue, but why not?

Andrew> 	if (page->flags & (1<<PG_uncached))

Andrew> would have been correcter.

DOH!

Desperately seeking a bulk supply of those brown paper bags!

>> I think you're supposed to always use test_bit() to check page
>> flags

Andrew> Yup.  Add PageUncached macros to page-flags.h.

Mmmmm another butt ugly StUdLyCaPs macro coming soon.

Thanks,
Jes

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 19:35         ` Dave Hansen
@ 2005-02-22 21:38           ` Jes Sorensen
  2005-02-23  0:48             ` Dave Hansen
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22 21:38 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andrew Morton, Matthew Wilcox, linux-ia64, Linux Kernel Mailing List

>>>>> "Dave" == Dave Hansen <haveblue@us.ibm.com> writes:

Dave> I was talking with Nigel Cunningham about doing something a
Dave> little different from the classic page flag bits when the number
Dave> of users is restricted and performance isn't ultra-critical.
Dave> Would something like this work for you, instead of using a real
Dave> page->flags bit for PG_cached?

Just took a quick look at this and it looks a bit heavy for our
use. We are only looking at a small number of pages. However I could
imagine future cases where performance may be more critical.

Cheers,
Jes

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22  9:52 [patch -mm series] ia64 specific /dev/mem handlers Jes Sorensen
  2005-02-22 10:03 ` Andrew Morton
@ 2005-02-22 22:03 ` Arjan van de Ven
  2005-02-22 22:30   ` Jes Sorensen
  2005-03-03 11:37 ` Andrew Morton
  2 siblings, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2005-02-22 22:03 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, linux-ia64, linux-kernel

On Tue, 2005-02-22 at 04:52 -0500, Jes Sorensen wrote:
> Hi,
> 
> This patch introduces ia64 specific read/write handlers for /dev/mem
> access which is needed to avoid uncached pages to be accessed through
> the cached kernel window which can lead to random corruption. It also
> introduces a new page-flag PG_uncached which will be used to mark the
> uncached pages. I assume this may be useful to other architectures as
> well where the CPU may use speculative reads which conflict with
> uncached access. In addition I moved do_write_mem to be under
> ARCH_HAS_DEV_MEM as it's only ever used if that is defined.
> 
> The patch is needed for the new ia64 special memory driver (mspec -
> former fetchop).


is there ANY valid reason to allow access to cached uses at all?
(eg kernel ram)

why not just disable any such ram access entirely...



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 22:03 ` Arjan van de Ven
@ 2005-02-22 22:30   ` Jes Sorensen
  2005-02-22 22:42     ` Arjan van de Ven
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22 22:30 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andrew Morton, linux-ia64, linux-kernel

>>>>> "Arjan" == Arjan van de Ven <arjan@infradead.org> writes:

Arjan> On Tue, 2005-02-22 at 04:52 -0500, Jes Sorensen wrote:
>> Hi,
>> 
>> This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption. It also introduces a new page-flag PG_uncached which
>> will be used to mark the uncached pages. I assume this may be
>> useful to other architectures as well where the CPU may use
>> speculative reads which conflict with uncached access. In addition
>> I moved do_write_mem to be under ARCH_HAS_DEV_MEM as it's only ever
>> used if that is defined.
>> 
>> The patch is needed for the new ia64 special memory driver (mspec -
>> former fetchop).

Arjan> is there ANY valid reason to allow access to cached uses at
Arjan> all?  (eg kernel ram)

Arjan> why not just disable any such ram access entirely...

You mean uncached?

For userspace it's used by some of the MPI type apps in userland.
Presumably there's cases where it gives better performance. For the
SN2 hardware there's also a special mode known as fetchop mode which
requires uncached memory, it's used quite heavily by the same types of
apps.

The problem is if you then have apps such as lcrash which may read
through all kernel memory. If a page is mapped uncached to userland
you can hit the memory corruption case if you access the same page
cached from within a kernel cached mapping. I suspect the suspend code
could hit similar problems, but I don't know that code well enough to
say if it's the case or not.

Cheers,
Jes

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 19:25       ` Andrew Morton
  2005-02-22 19:35         ` Dave Hansen
  2005-02-22 21:34         ` Jes Sorensen
@ 2005-02-22 22:39         ` Jes Sorensen
  2005-02-22 23:34           ` Andrew Morton
  2 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-22 22:39 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Matthew Wilcox, linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> Matthew Wilcox <matthew@wil.cx> wrote:
>> 
>> On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> > >> + if (page->flags & PG_uncached)
>> > 
>> > Andrew> dude.  That ain't gonna work ;)
>> > 
>> > Pardon my lack of clue, but why not?

Andrew> 	if (page->flags & (1<<PG_uncached))

Andrew> would have been correcter.

Andrew,

After applying the clue 2x4 to my head a couple of times, I came up
with this patch. Hopefully it will work a bit better ;-)

Cheers,
Jes

Signed-off-by: Jes Sorensen <jes@wildopensource.com>

diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/Makefile	2005-02-16 11:20:19 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/Makefile	2005-02-16 11:58:35 -08:00
@@ -7,7 +7,7 @@
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
 	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
-	 unwind.o mca.o mca_asm.o topology.o
+	 unwind.o mca.o mca_asm.o topology.o mem.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
 obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/arch/ia64/kernel/mem.c	1969-12-31 16:00:00 -08:00
+++ linux-2.6.11-rc3-mm2/arch/ia64/kernel/mem.c	2005-02-22 14:11:40 -08:00
@@ -0,0 +1,151 @@
+/*
+ *  arch/ia64/kernel/mem.c
+ *
+ *  IA64 specific  portions of /dev/mem access, notably handling
+ *  read/write from uncached memory
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2005 Jes Sorensen <jes@wildopensource.com>
+ */
+
+
+#include <linux/mm.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
+extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
+extern int open_port(struct inode * inode, struct file * filp);
+
+
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+	unsigned long cursor;
+
+	cursor = from >> PAGE_SHIFT;
+	while ((cursor << PAGE_SHIFT) < to) {
+		if (!devmem_is_allowed(cursor))
+			return 0;
+		cursor++;
+	}
+	return 1;
+}
+
+
+/*
+ * This funcion reads the *physical* memory. The f_pos points directly
+ * to the memory location. 
+ */
+static ssize_t read_mem(struct file * file, char __user * buf,
+			size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read, sz;
+	struct page *page;
+	char *ptr;
+
+
+	if (!valid_phys_addr_range(p, &count))
+		return -EFAULT;
+	read = 0;
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		if (PageUncached(page))
+			ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+		else
+			ptr = __va(p);
+		if (copy_to_user(buf, ptr, sz))
+			return -EFAULT;
+		buf += sz;
+		p += sz;
+		count -= sz;
+		read += sz;
+	}
+
+	*ppos += read;
+	return read;
+}
+
+
+static ssize_t write_mem(struct file * file, const char __user * buf, 
+			 size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	unsigned long copied;
+	ssize_t written, sz;
+	struct page *page;
+	char *ptr;
+
+	if (!valid_phys_addr_range(p, &count))
+		return -EFAULT;
+
+	written = 0;
+
+	if (!range_is_allowed(p, p + count))
+		return -EPERM;
+	/*
+	 * Need virtual p below here
+	 */
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		if (PageUncached(page))
+			ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+		else
+			ptr = __va(p);
+
+		copied = copy_from_user(ptr, buf, sz);
+		if (copied) {
+			ssize_t ret;
+
+			ret = written + (sz - copied);
+			if (ret)
+				return ret;
+			return -EFAULT;
+		}
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+
+	*ppos += written;
+	return written;
+}
+
+
+struct file_operations mem_fops = {
+	.llseek		= memory_lseek,
+	.read		= read_mem,
+	.write		= write_mem,
+	.mmap		= mmap_kmem,
+	.open		= open_port,
+};
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c	2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2/drivers/char/mem.c	2005-02-16 11:58:35 -08:00
@@ -125,39 +125,7 @@
 	}
 	return 1;
 }
-static ssize_t do_write_mem(void *p, unsigned long realp,
-			    const char __user * buf, size_t count, loff_t *ppos)
-{
-	ssize_t written;
-	unsigned long copied;
-
-	written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-	/* we don't have page 0 mapped on sparc and m68k.. */
-	if (realp < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-realp;
-		if (sz > count) sz = count; 
-		/* Hmm. Do something? */
-		buf+=sz;
-		p+=sz;
-		count-=sz;
-		written+=sz;
-	}
-#endif
-	if (!range_is_allowed(realp, realp+count))
-		return -EPERM;
-	copied = copy_from_user(p, buf, count);
-	if (copied) {
-		ssize_t ret = written + (count - copied);
 
-		if (ret)
-			return ret;
-		return -EFAULT;
-	}
-	written += count;
-	*ppos += written;
-	return written;
-}
 
 #ifndef ARCH_HAS_DEV_MEM
 /*
@@ -196,6 +164,40 @@
 	return read;
 }
 
+static ssize_t do_write_mem(void *p, unsigned long realp,
+			    const char __user * buf, size_t count, loff_t *ppos)
+{
+	ssize_t written;
+	unsigned long copied;
+
+	written = 0;
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (realp < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE-realp;
+		if (sz > count) sz = count; 
+		/* Hmm. Do something? */
+		buf+=sz;
+		p+=sz;
+		count-=sz;
+		written+=sz;
+	}
+#endif
+	if (!range_is_allowed(realp, realp+count))
+		return -EPERM;
+	copied = copy_from_user(p, buf, count);
+	if (copied) {
+		ssize_t ret = written + (count - copied);
+
+		if (ret)
+			return ret;
+		return -EFAULT;
+	}
+	written += count;
+	*ppos += written;
+	return written;
+}
+
 static ssize_t write_mem(struct file * file, const char __user * buf, 
 			 size_t count, loff_t *ppos)
 {
@@ -207,7 +209,8 @@
 }
 #endif
 
-static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+
+int mmap_kmem(struct file * file, struct vm_area_struct * vma)
 {
 #ifdef pgprot_noncached
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
@@ -553,7 +556,7 @@
  * also note that seeking relative to the "end of file" isn't supported:
  * it has no meaning, so it returns -EINVAL.
  */
-static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+loff_t memory_lseek(struct file * file, loff_t offset, int orig)
 {
 	loff_t ret;
 
@@ -576,7 +579,7 @@
 	return ret;
 }
 
-static int open_port(struct inode * inode, struct file * filp)
+int open_port(struct inode * inode, struct file * filp)
 {
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h linux-2.6.11-rc3-mm2/include/asm-ia64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2/include/asm-ia64/io.h	2005-02-16 11:58:35 -08:00
@@ -481,4 +481,6 @@
 #define BIO_VMERGE_BOUNDARY	(ia64_max_iommu_merge_mask + 1)
 #endif
 
+#define ARCH_HAS_DEV_MEM
+
 #endif /* _ASM_IA64_IO_H */
diff -urN -X /usr/people/jes/exclude-linux linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h	2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2/include/linux/page-flags.h	2005-02-22 13:40:22 -08:00
@@ -76,7 +76,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
-
+#define PG_uncached		20	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -301,6 +301,10 @@
 #else
 #define PageSwapCache(page)	0
 #endif
+
+#define PageUncached(page)	test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
 struct page;	/* forward declaration */
 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 22:30   ` Jes Sorensen
@ 2005-02-22 22:42     ` Arjan van de Ven
  2005-02-23  8:12       ` Jes Sorensen
  0 siblings, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2005-02-22 22:42 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, linux-ia64, linux-kernel

On Tue, 2005-02-22 at 17:30 -0500, Jes Sorensen wrote:
> 
> For userspace it's used by some of the MPI type apps in userland.

you got to be kidding. Why are these MPI apps accessing memory that the
kernel has mapped cached (eg ram) via /dev/mem?


(eg my proposal is to make /dev/mem to be just device memory not kernel
accessable ram; wouldn't that solve the entire issue cleanly ?)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 22:39         ` Jes Sorensen
@ 2005-02-22 23:34           ` Andrew Morton
  2005-02-23 10:01             ` Jes Sorensen
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2005-02-22 23:34 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: matthew, linux-ia64, linux-kernel

Jes Sorensen <jes@wildopensource.com> wrote:
>
> After applying the clue 2x4 to my head a couple of times, I came up
> with this patch. Hopefully it will work a bit better ;-)
> 

I know it's repetitious, but it's nice to maintain a changelog entry along
with the patch.  Especially when seventy people have asked "wtf is this patch
for?".

Implementation-wise, do you really need to clone-and-own the mem.c
functions?  Would it not be sufficient to do

	ptr = arch_translate_mem_ptr(page, ptr);

inside mem.c?


> + *  arch/ia64/kernel/mem.c
> ...
> +extern loff_t memory_lseek(struct file * file, loff_t offset, int orig);
> +extern int mmap_kmem(struct file * file, struct vm_area_struct * vma);
> +extern int open_port(struct inode * inode, struct file * filp);
> +

Please find a .h file for the function prototypes.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 21:38           ` Jes Sorensen
@ 2005-02-23  0:48             ` Dave Hansen
  0 siblings, 0 replies; 26+ messages in thread
From: Dave Hansen @ 2005-02-23  0:48 UTC (permalink / raw)
  To: Jes Sorensen
  Cc: Andrew Morton, Matthew Wilcox, linux-ia64, Linux Kernel Mailing List

On Tue, 2005-02-22 at 16:38 -0500, Jes Sorensen wrote:
> >>>>> "Dave" == Dave Hansen <haveblue@us.ibm.com> writes:
> 
> Dave> I was talking with Nigel Cunningham about doing something a
> Dave> little different from the classic page flag bits when the number
> Dave> of users is restricted and performance isn't ultra-critical.
> Dave> Would something like this work for you, instead of using a real
> Dave> page->flags bit for PG_cached?
> 
> Just took a quick look at this and it looks a bit heavy for our
> use. We are only looking at a small number of pages. However I could
> imagine future cases where performance may be more critical.

If it's a quite small number (or range) of pages, perhaps a short
list_head list would suffice.  It would sure beat consuming a page flag.

-- Dave


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 22:42     ` Arjan van de Ven
@ 2005-02-23  8:12       ` Jes Sorensen
  2005-02-23  8:24         ` Arjan van de Ven
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-23  8:12 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andrew Morton, linux-ia64, linux-kernel

>>>>> "Arjan" == Arjan van de Ven <arjan@infradead.org> writes:

Arjan> On Tue, 2005-02-22 at 17:30 -0500, Jes Sorensen wrote:
>>  For userspace it's used by some of the MPI type apps in userland.

Arjan> you got to be kidding. Why are these MPI apps accessing memory
Arjan> that the kernel has mapped cached (eg ram) via /dev/mem?

Oh sorry, I think  we're misunderstanding eachother. /dev/mem is used
by lcrash.

Arjan> (eg my proposal is to make /dev/mem to be just device memory
Arjan> not kernel accessable ram; wouldn't that solve the entire issue
Arjan> cleanly ?)

It would kill lcrash support, but sure it would solve this specific
problem. However what happens if someone wants to share say some
texture ram between the kernel and a video card and that has to be
mapped uncached? Though up example here though.

Cheers,
Jes


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-23  8:12       ` Jes Sorensen
@ 2005-02-23  8:24         ` Arjan van de Ven
  0 siblings, 0 replies; 26+ messages in thread
From: Arjan van de Ven @ 2005-02-23  8:24 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, linux-ia64, linux-kernel

>  However what happens if someone wants to share say some
> texture ram between the kernel and a video card and that has to be
> mapped uncached? Though up example here though.

also that's surely supposed to be controlled by some kernel driver,
which in turn can and should export it's own mmap instead with all the
proper attributes...



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22 23:34           ` Andrew Morton
@ 2005-02-23 10:01             ` Jes Sorensen
  2005-02-23 22:34               ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-23 10:01 UTC (permalink / raw)
  To: Andrew Morton; +Cc: matthew, linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> Jes Sorensen <jes@wildopensource.com> wrote:
>>  After applying the clue 2x4 to my head a couple of times, I came
>> up with this patch. Hopefully it will work a bit better ;-)
>> 

Andrew> I know it's repetitious, but it's nice to maintain a changelog
Andrew> entry along with the patch.  Especially when seventy people
Andrew> have asked "wtf is this patch for?".

True, that got lost in the multi-iteration game. Added one now.

Andrew> Implementation-wise, do you really need to clone-and-own the
Andrew> mem.c functions?  Would it not be sufficient to do

Andrew> 	ptr = arch_translate_mem_ptr(page, ptr);

Andrew> inside mem.c?

I tried to avoid making too many changes to drivers/char/mem.c but on
the long run this is probably cleaner. It makes it slightly more
complex on all architectures to do large read/write calls on /dev/mem
but it's not exactly a performance path.

>> + * arch/ia64/kernel/mem.c ...  +extern loff_t memory_lseek(struct
>> file * file, loff_t offset, int orig); +extern int mmap_kmem(struct
>> file * file, struct vm_area_struct * vma); +extern int
>> open_port(struct inode * inode, struct file * filp); +

Andrew> Please find a .h file for the function prototypes.

All gone with the merge into drivers/char/mem.c

Note, I put the macros in include/asm-ia64/uaccess.h to avoid all hell
breaking lose in the ia64 build process which generates offsets.h. We
can introduce a new header file but it seems a bit overkill imho.

Cheers,
Jes

Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
available. Needed on ia64 for pages converted fo uncached mappings to
avoid it being accessed in cached mode after the conversion which can
lead to memory corruption. Introduces PG_uncached page flag for
marking pages uncached.

Also folds do_write_mem into write_mem as it was it's only user.

Signed-off-by: Jes Sorensen <jes@wildopensource.com>


diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2-2/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c	2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2-2/drivers/char/mem.c	2005-02-23 01:51:54 -08:00
@@ -125,39 +125,6 @@
 	}
 	return 1;
 }
-static ssize_t do_write_mem(void *p, unsigned long realp,
-			    const char __user * buf, size_t count, loff_t *ppos)
-{
-	ssize_t written;
-	unsigned long copied;
-
-	written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-	/* we don't have page 0 mapped on sparc and m68k.. */
-	if (realp < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-realp;
-		if (sz > count) sz = count; 
-		/* Hmm. Do something? */
-		buf+=sz;
-		p+=sz;
-		count-=sz;
-		written+=sz;
-	}
-#endif
-	if (!range_is_allowed(realp, realp+count))
-		return -EPERM;
-	copied = copy_from_user(p, buf, count);
-	if (copied) {
-		ssize_t ret = written + (count - copied);
-
-		if (ret)
-			return ret;
-		return -EFAULT;
-	}
-	written += count;
-	*ppos += written;
-	return written;
-}
 
 #ifndef ARCH_HAS_DEV_MEM
 /*
@@ -168,7 +135,9 @@
 			size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
-	ssize_t read;
+	ssize_t read, sz;
+	struct page *page;
+	char *ptr;
 
 	if (!valid_phys_addr_range(p, &count))
 		return -EFAULT;
@@ -176,7 +145,7 @@
 #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
 	/* we don't have page 0 mapped on sparc and m68k.. */
 	if (p < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-p;
+		sz = PAGE_SIZE - p;
 		if (sz > count) 
 			sz = count; 
 		if (sz > 0) {
@@ -189,9 +158,35 @@
 		}
 	}
 #endif
-	if (copy_to_user(buf, __va(p), count))
-		return -EFAULT;
-	read += count;
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
+		ptr = arch_translate_mem_ptr(page, p);
+#else
+		ptr = __va(p);
+#endif
+		if (copy_to_user(buf, ptr, sz))
+			return -EFAULT;
+		buf += sz;
+		p += sz;
+		count -= sz;
+		read += sz;
+	}
+
 	*ppos += read;
 	return read;
 }
@@ -200,10 +195,70 @@
 			 size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
+	ssize_t written, sz;
+	unsigned long copied;
+	struct page *page;
+	void *ptr;
 
 	if (!valid_phys_addr_range(p, &count))
 		return -EFAULT;
-	return do_write_mem(__va(p), p, buf, count, ppos);
+
+	if (!range_is_allowed(p, p + count))
+		return -EPERM;
+
+	written = 0;
+
+#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (p < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE - p;
+		if (sz > count)
+			sz = count; 
+		/* Hmm. Do something? */
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+#endif
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		page = pfn_to_page(p >> PAGE_SHIFT);
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
+		ptr = arch_translate_mem_ptr(page, p);
+#else
+		ptr = __va(p);
+#endif
+		copied = copy_from_user(ptr, buf, sz);
+		if (copied) {
+			ssize_t ret;
+
+			ret = written + (sz - copied);
+			if (ret)
+				return ret;
+			return -EFAULT;
+		}
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+
+	*ppos += written;
+	return written;
 }
 #endif
 
diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h linux-2.6.11-rc3-mm2-2/include/asm-ia64/uaccess.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-2/include/asm-ia64/uaccess.h	2005-02-23 00:42:12 -08:00
@@ -35,6 +35,8 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>
 
 #include <asm/intrinsics.h>
 #include <asm/pgtable.h>
@@ -365,6 +367,20 @@
 		return 1;
 	}
 	return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR	1
+static __inline__ char *
+arch_translate_mem_ptr (struct page *page, unsigned long p)
+{
+	char * ptr;
+
+	if (PageUncached(page))
+		ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = __va(p);
+
+	return ptr;
 }
 
 #endif /* _ASM_IA64_UACCESS_H */
diff -urN -X /usr/people/jes/exclude-linux --exclude=shubio.h linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2-2/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h	2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-2/include/linux/page-flags.h	2005-02-22 13:40:22 -08:00
@@ -76,7 +76,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
-
+#define PG_uncached		20	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -301,6 +301,10 @@
 #else
 #define PageSwapCache(page)	0
 #endif
+
+#define PageUncached(page)	test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
 struct page;	/* forward declaration */
 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-23 10:01             ` Jes Sorensen
@ 2005-02-23 22:34               ` Christoph Hellwig
  2005-02-24 16:11                 ` Jes Sorensen
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2005-02-23 22:34 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, matthew, linux-ia64, linux-kernel

> +		page = pfn_to_page(p >> PAGE_SHIFT);
> +		/*
> +		 * On ia64 if a page has been mapped somewhere as
> +		 * uncached, then it must also be accessed uncached
> +		 * by the kernel or data corruption may occur
> +		 */
> +#ifdef ARCH_HAS_TRANSLATE_MEM_PTR
> +		ptr = arch_translate_mem_ptr(page, p);
> +#else
> +		ptr = __va(p);
> +#endif

Please remove the ifdef by letting every architecture implement a
arch_translate_mem_ptr (and give it a saner name while you're at it).

Also shouldn't the pfn_to_page be done inside arch_translate_mem_ptr?
The struct page * isn't used anywhere else.

> +	if (!range_is_allowed(p, p + count))

isn't the name a little too generic?

> +
> +	written = 0;
> +
> +#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
> +	/* we don't have page 0 mapped on sparc and m68k.. */
> +	if (p < PAGE_SIZE) {
> +		unsigned long sz = PAGE_SIZE - p;
> +		if (sz > count)
> +			sz = count; 
> +		/* Hmm. Do something? */
> +		buf += sz;
> +		p += sz;
> +		count -= sz;
> +		written += sz;
> +	}
> +#endif

While you're at it replace the ifdef mania with a #ifdef
__HAVE_ARCH_PAGE_ZERO_MAPPED or something similar.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-23 22:34               ` Christoph Hellwig
@ 2005-02-24 16:11                 ` Jes Sorensen
  2005-03-10  6:55                   ` Andrew Morton
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-02-24 16:11 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Andrew Morton, linux-ia64, linux-kernel

>>>>> "Christoph" == Christoph Hellwig <hch@infradead.org> writes:

Christoph> Please remove the ifdef by letting every architecture
Christoph> implement a arch_translate_mem_ptr (and give it a saner
Christoph> name while you're at it).

Ok, I thought it was tidier the other way, but I am not biased.

Christoph> Also shouldn't the pfn_to_page be done inside
Christoph> arch_translate_mem_ptr?  The struct page * isn't used
Christoph> anywhere else.

Done!

>> + if (!range_is_allowed(p, p + count))

Christoph> isn't the name a little too generic?

Actually, thats an old function, nothing I added - but I renamed it.

Christoph> While you're at it replace the ifdef mania with a #ifdef
Christoph> __HAVE_ARCH_PAGE_ZERO_MAPPED or something similar.

Using __ARCH_HAS_NO_PAGE_ZERO_MAPPED for it now.

Cheers,
Jes

Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
available. Needed on ia64 for pages converted fo uncached mappings to
avoid it being accessed in cached mode after the conversion which can
lead to memory corruption. Introduces PG_uncached page flag for
marking pages uncached.

Also folds do_write_mem into write_mem as it was it's only user.

Use __ARCH_HAS_NO_PAGE_ZERO_MAPPED for architectures to indicate they
require magic handling of the zero page (Sparc and m68k).

Signed-off-by: Jes Sorensen <jes@wildopensource.com>


diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c linux-2.6.11-rc3-mm2-3/drivers/char/mem.c
--- linux-2.6.11-rc3-mm2-vanilla/drivers/char/mem.c	2005-02-16 11:20:55 -08:00
+++ linux-2.6.11-rc3-mm2-3/drivers/char/mem.c	2005-02-24 04:45:34 -08:00
@@ -125,39 +125,6 @@
 	}
 	return 1;
 }
-static ssize_t do_write_mem(void *p, unsigned long realp,
-			    const char __user * buf, size_t count, loff_t *ppos)
-{
-	ssize_t written;
-	unsigned long copied;
-
-	written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-	/* we don't have page 0 mapped on sparc and m68k.. */
-	if (realp < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-realp;
-		if (sz > count) sz = count; 
-		/* Hmm. Do something? */
-		buf+=sz;
-		p+=sz;
-		count-=sz;
-		written+=sz;
-	}
-#endif
-	if (!range_is_allowed(realp, realp+count))
-		return -EPERM;
-	copied = copy_from_user(p, buf, count);
-	if (copied) {
-		ssize_t ret = written + (count - copied);
-
-		if (ret)
-			return ret;
-		return -EFAULT;
-	}
-	written += count;
-	*ppos += written;
-	return written;
-}
 
 #ifndef ARCH_HAS_DEV_MEM
 /*
@@ -168,15 +135,16 @@
 			size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
-	ssize_t read;
+	ssize_t read, sz;
+	char *ptr;
 
 	if (!valid_phys_addr_range(p, &count))
 		return -EFAULT;
 	read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
 	/* we don't have page 0 mapped on sparc and m68k.. */
 	if (p < PAGE_SIZE) {
-		unsigned long sz = PAGE_SIZE-p;
+		sz = PAGE_SIZE - p;
 		if (sz > count) 
 			sz = count; 
 		if (sz > 0) {
@@ -189,9 +157,31 @@
 		}
 	}
 #endif
-	if (copy_to_user(buf, __va(p), count))
-		return -EFAULT;
-	read += count;
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		ptr = xlate_dev_mem_ptr(p);
+
+		if (copy_to_user(buf, ptr, sz))
+			return -EFAULT;
+		buf += sz;
+		p += sz;
+		count -= sz;
+		read += sz;
+	}
+
 	*ppos += read;
 	return read;
 }
@@ -200,10 +190,65 @@
 			 size_t count, loff_t *ppos)
 {
 	unsigned long p = *ppos;
+	ssize_t written, sz;
+	unsigned long copied;
+	void *ptr;
 
 	if (!valid_phys_addr_range(p, &count))
 		return -EFAULT;
-	return do_write_mem(__va(p), p, buf, count, ppos);
+
+	if (!range_is_allowed(p, p + count))
+		return -EPERM;
+
+	written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+	/* we don't have page 0 mapped on sparc and m68k.. */
+	if (p < PAGE_SIZE) {
+		unsigned long sz = PAGE_SIZE - p;
+		if (sz > count)
+			sz = count; 
+		/* Hmm. Do something? */
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+#endif
+
+	while (count > 0) {
+		/*
+		 * Handle first page in case it's not aligned
+		 */
+		if (-p & (PAGE_SIZE - 1))
+			sz = -p & (PAGE_SIZE - 1);
+		else
+			sz = min(PAGE_SIZE, count);
+
+		/*
+		 * On ia64 if a page has been mapped somewhere as
+		 * uncached, then it must also be accessed uncached
+		 * by the kernel or data corruption may occur
+		 */
+		ptr = xlate_dev_mem_ptr(p);
+
+		copied = copy_from_user(ptr, buf, sz);
+		if (copied) {
+			ssize_t ret;
+
+			ret = written + (sz - copied);
+			if (ret)
+				return ret;
+			return -EFAULT;
+		}
+		buf += sz;
+		p += sz;
+		count -= sz;
+		written += sz;
+	}
+
+	*ppos += written;
+	return written;
 }
 #endif
 
@@ -303,7 +348,7 @@
 		if (count > (unsigned long) high_memory - p)
 			read = (unsigned long) high_memory - p;
 
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
 		/* we don't have page 0 mapped on sparc and m68k.. */
 		if (p < PAGE_SIZE && read > 0) {
 			size_t tmp = PAGE_SIZE - p;
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-alpha/io.h linux-2.6.11-rc3-mm2-3/include/asm-alpha/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-alpha/io.h	2004-12-24 13:35:28 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-alpha/io.h	2005-02-24 04:41:25 -08:00
@@ -666,6 +666,12 @@
 #define writeq writeq
 #define readq readq
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif /* __ALPHA_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-arm/io.h linux-2.6.11-rc3-mm2-3/include/asm-arm/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-arm/io.h	2004-12-24 13:34:30 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-arm/io.h	2005-02-24 04:41:48 -08:00
@@ -271,5 +271,11 @@
 #define BIOVEC_MERGEABLE(vec1, vec2)	\
 	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_ARM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-arm26/io.h linux-2.6.11-rc3-mm2-3/include/asm-arm26/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-arm26/io.h	2004-12-24 13:35:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-arm26/io.h	2005-02-24 04:41:56 -08:00
@@ -420,5 +420,11 @@
 #define BIOVEC_MERGEABLE(vec1, vec2)	\
 	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_ARM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-cris/io.h linux-2.6.11-rc3-mm2-3/include/asm-cris/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-cris/io.h	2004-12-24 13:35:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-cris/io.h	2005-02-24 04:42:07 -08:00
@@ -86,4 +86,10 @@
 #define outsw(x,y,z)
 #define outsl(x,y,z)
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-frv/io.h linux-2.6.11-rc3-mm2-3/include/asm-frv/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-frv/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-frv/io.h	2005-02-24 04:42:19 -08:00
@@ -273,6 +273,13 @@
 	__asm__ __volatile__ ("membar" : : :"memory");
 }
 
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-h8300/io.h linux-2.6.11-rc3-mm2-3/include/asm-h8300/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-h8300/io.h	2004-12-24 13:34:58 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-h8300/io.h	2005-02-24 04:42:32 -08:00
@@ -317,6 +317,12 @@
 #define virt_to_bus virt_to_phys
 #define bus_to_virt phys_to_virt
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif /* _H8300_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-i386/io.h linux-2.6.11-rc3-mm2-3/include/asm-i386/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-i386/io.h	2004-12-24 13:35:40 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-i386/io.h	2005-02-24 04:41:07 -08:00
@@ -49,6 +49,12 @@
 
 #include <linux/vmalloc.h>
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 /**
  *	virt_to_phys	-	map virtual addresses to physical
  *	@address: address to remap
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h linux-2.6.11-rc3-mm2-3/include/asm-ia64/uaccess.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ia64/uaccess.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ia64/uaccess.h	2005-02-24 04:47:01 -08:00
@@ -35,6 +35,8 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/mm.h>
 
 #include <asm/intrinsics.h>
 #include <asm/pgtable.h>
@@ -365,6 +367,22 @@
 		return 1;
 	}
 	return 0;
+}
+
+#define ARCH_HAS_TRANSLATE_MEM_PTR	1
+static __inline__ char *
+xlate_dev_mem_ptr (unsigned long p)
+{
+	struct page *page;
+	char * ptr;
+
+	page = pfn_to_page(p >> PAGE_SHIFT);
+	if (PageUncached(page))
+		ptr = (char *)p + __IA64_UNCACHED_OFFSET;
+	else
+		ptr = __va(p);
+
+	return ptr;
 }
 
 #endif /* _ASM_IA64_UACCESS_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m32r/io.h linux-2.6.11-rc3-mm2-3/include/asm-m32r/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m32r/io.h	2004-12-24 13:34:45 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m32r/io.h	2005-02-24 04:43:02 -08:00
@@ -216,6 +216,12 @@
 	memcpy((void __force *) dst, src, count);
 }
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif  /* __KERNEL__ */
 
 #endif  /* _ASM_M32R_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m68k/io.h linux-2.6.11-rc3-mm2-3/include/asm-m68k/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m68k/io.h	2004-12-24 13:35:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m68k/io.h	2005-02-24 04:43:18 -08:00
@@ -359,4 +359,13 @@
 #endif
 
 #endif /* __KERNEL__ */
+
+#define __ARCH_HAS_NO_PAGE_ZERO_MAPPED		1
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* _IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-m68knommu/io.h linux-2.6.11-rc3-mm2-3/include/asm-m68knommu/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-m68knommu/io.h	2004-12-24 13:34:32 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-m68knommu/io.h	2005-02-24 04:43:31 -08:00
@@ -187,6 +187,12 @@
 #define virt_to_bus virt_to_phys
 #define bus_to_virt phys_to_virt
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif /* _M68KNOMMU_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-mips/io.h linux-2.6.11-rc3-mm2-3/include/asm-mips/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-mips/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-mips/io.h	2005-02-24 04:43:42 -08:00
@@ -616,4 +616,10 @@
 #define csr_out32(v,a) (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST) = (v))
 #define csr_in32(a)    (*(volatile u32 *)((unsigned long)(a) + __CSR_32_ADJUST))
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* _ASM_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-parisc/io.h linux-2.6.11-rc3-mm2-3/include/asm-parisc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-parisc/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-parisc/io.h	2005-02-24 04:43:53 -08:00
@@ -403,4 +403,10 @@
 
 #include <asm-generic/iomap.h>
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc/io.h linux-2.6.11-rc3-mm2-3/include/asm-ppc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc/io.h	2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ppc/io.h	2005-02-24 04:44:04 -08:00
@@ -549,4 +549,10 @@
 #include <asm/mpc8260_pci9.h>
 #endif
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc64/io.h linux-2.6.11-rc3-mm2-3/include/asm-ppc64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-ppc64/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-ppc64/io.h	2005-02-24 04:44:11 -08:00
@@ -442,6 +442,12 @@
 extern int check_legacy_ioport(unsigned long base_port);
 
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif /* _PPC64_IO_H */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-s390/io.h linux-2.6.11-rc3-mm2-3/include/asm-s390/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-s390/io.h	2004-12-24 13:35:01 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-s390/io.h	2005-02-24 04:44:21 -08:00
@@ -107,6 +107,12 @@
 
 #define mmiowb()
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc/io.h linux-2.6.11-rc3-mm2-3/include/asm-sparc/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc/io.h	2005-02-16 11:20:23 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-sparc/io.h	2005-02-24 04:44:28 -08:00
@@ -274,4 +274,12 @@
 
 #endif
 
+#define __ARCH_HAS_NO_PAGE_ZERO_MAPPED		1
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* !(__SPARC_IO_H) */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc64/io.h linux-2.6.11-rc3-mm2-3/include/asm-sparc64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-sparc64/io.h	2004-12-24 13:35:25 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-sparc64/io.h	2005-02-24 04:44:38 -08:00
@@ -485,6 +485,12 @@
 #define dma_cache_wback(_start,_size)		do { } while (0)
 #define dma_cache_wback_inv(_start,_size)	do { } while (0)
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif
 
 #endif /* !(__SPARC64_IO_H) */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-um/io.h linux-2.6.11-rc3-mm2-3/include/asm-um/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-um/io.h	2004-12-24 13:35:28 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-um/io.h	2005-02-24 04:44:50 -08:00
@@ -22,4 +22,10 @@
 	return __va(address);
 }
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-v850/io.h linux-2.6.11-rc3-mm2-3/include/asm-v850/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-v850/io.h	2004-12-24 13:34:00 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-v850/io.h	2005-02-24 04:44:59 -08:00
@@ -119,4 +119,10 @@
 #define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
 #define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __V850_IO_H__ */
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/asm-x86_64/io.h linux-2.6.11-rc3-mm2-3/include/asm-x86_64/io.h
--- linux-2.6.11-rc3-mm2-vanilla/include/asm-x86_64/io.h	2005-02-16 11:20:24 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/asm-x86_64/io.h	2005-02-24 04:45:07 -08:00
@@ -329,6 +329,12 @@
 extern int iommu_bio_merge;
 #define BIO_VMERGE_BOUNDARY iommu_bio_merge
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
 #endif /* __KERNEL__ */
 
 #endif
diff -X /usr/people/jes/exclude-linux -urN linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h linux-2.6.11-rc3-mm2-3/include/linux/page-flags.h
--- linux-2.6.11-rc3-mm2-vanilla/include/linux/page-flags.h	2005-02-16 11:20:57 -08:00
+++ linux-2.6.11-rc3-mm2-3/include/linux/page-flags.h	2005-02-24 02:08:57 -08:00
@@ -76,7 +76,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_nosave_free		19	/* Free, should not be written */
-
+#define PG_uncached		20	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -301,6 +301,10 @@
 #else
 #define PageSwapCache(page)	0
 #endif
+
+#define PageUncached(page)	test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
 struct page;	/* forward declaration */
 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-22  9:52 [patch -mm series] ia64 specific /dev/mem handlers Jes Sorensen
  2005-02-22 10:03 ` Andrew Morton
  2005-02-22 22:03 ` Arjan van de Ven
@ 2005-03-03 11:37 ` Andrew Morton
  2005-03-03 12:53   ` Jes Sorensen
  2 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2005-03-03 11:37 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: linux-ia64, linux-kernel

jes@trained-monkey.org (Jes Sorensen) wrote:
>
> This patch introduces ia64 specific read/write handlers for /dev/mem
>  access which is needed to avoid uncached pages to be accessed through
>  the cached kernel window which can lead to random corruption.

This patch causes hiccups on my ia32e box.

linux:/home/akpm#  /usr/sbin/hwscan --isapnp                       
zsh: 7528 segmentation fault  /usr/sbin/hwscan --isapnp
linux:/home/akpm#  /usr/sbin/hwscan --pci   
zsh: 7529 segmentation fault  /usr/sbin/hwscan --pci
linux:/home/akpm#  /usr/sbin/hwscan --block
zsh: 7530 segmentation fault  /usr/sbin/hwscan --block
linux:/home/akpm#  /usr/sbin/hwscan --floppy
zsh: 7533 segmentation fault  /usr/sbin/hwscan --floppy

strace ends with:

open("/proc/apm", O_RDONLY)             = -1 ENOENT (No such file or directory)
open("/dev/mem", O_RDONLY)              = 3
lseek(3, 1024, SEEK_SET)                = 1024
read(3, 0x50a080, 256)                  = -1 EFAULT (Bad address)
close(3)                                = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-03-03 11:37 ` Andrew Morton
@ 2005-03-03 12:53   ` Jes Sorensen
  2005-03-04 12:26     ` Jes Sorensen
  0 siblings, 1 reply; 26+ messages in thread
From: Jes Sorensen @ 2005-03-03 12:53 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> jes@trained-monkey.org (Jes Sorensen) wrote:
>>  This patch introduces ia64 specific read/write handlers for
>> /dev/mem access which is needed to avoid uncached pages to be
>> accessed through the cached kernel window which can lead to random
>> corruption.

Andrew> This patch causes hiccups on my ia32e box.

Andrew> linux:/home/akpm# /usr/sbin/hwscan --isapnp zsh: 7528
Andrew> segmentation fault

Weird, I'll take a look.

Thanks,
Jes


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-03-03 12:53   ` Jes Sorensen
@ 2005-03-04 12:26     ` Jes Sorensen
  0 siblings, 0 replies; 26+ messages in thread
From: Jes Sorensen @ 2005-03-04 12:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-ia64, linux-kernel

>>>>> "Jes" == Jes Sorensen <jes@wildopensource.com> writes:

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:
Andrew> This patch causes hiccups on my ia32e box.

Andrew> linux:/home/akpm# /usr/sbin/hwscan --isapnp zsh: 7528
Andrew> segmentation fault

Jes> Weird, I'll take a look.

-EPROGRAMMERISANIDIOT ... try this.

Cheers,
Jes

/dev/mem handle read/write case where ppos+count < PAGE_SIZE

Signed-off-by: Jes Sorensen <jes@wildopensource.com>


--- ../old/linux-2.6.11-rc5-mm1/drivers/char/mem.c	2005-03-04 07:20:51.000000000 -0500
+++ drivers/char/mem.c	2005-03-04 07:22:20.000000000 -0500
@@ -152,7 +152,9 @@
 		if (-p & (PAGE_SIZE - 1))
 			sz = -p & (PAGE_SIZE - 1);
 		else
-			sz = min_t(unsigned long, PAGE_SIZE, count);
+			sz = PAGE_SIZE;
+
+		sz = min_t(unsigned long, sz, count);
 
 		/*
 		 * On ia64 if a page has been mapped somewhere as
@@ -207,7 +209,9 @@
 		if (-p & (PAGE_SIZE - 1))
 			sz = -p & (PAGE_SIZE - 1);
 		else
-			sz = min_t(unsigned long, PAGE_SIZE, count);
+			sz = PAGE_SIZE;
+
+		sz = min_t(unsigned long, sz, count);
 
 		/*
 		 * On ia64 if a page has been mapped somewhere as
@@ -353,7 +357,9 @@
 			if (-p & (PAGE_SIZE - 1))
 				sz = -p & (PAGE_SIZE - 1);
 			else
-				sz = min_t(unsigned long, PAGE_SIZE, count);
+				sz = PAGE_SIZE;
+
+			sz = min_t(unsigned long, sz, count);
 
 			/*
 			 * On ia64 if a page has been mapped somewhere as
@@ -430,7 +436,9 @@
 		if (-realp & (PAGE_SIZE - 1))
 			sz = -realp & (PAGE_SIZE - 1);
 		else
-			sz = min_t(unsigned long, PAGE_SIZE, count);
+			sz = PAGE_SIZE;
+
+		sz = min_t(unsigned long, sz, count);
 
 		/*
 		 * On ia64 if a page has been mapped somewhere as

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-02-24 16:11                 ` Jes Sorensen
@ 2005-03-10  6:55                   ` Andrew Morton
  2005-03-10 13:49                     ` Jes Sorensen
  0 siblings, 1 reply; 26+ messages in thread
From: Andrew Morton @ 2005-03-10  6:55 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: hch, linux-ia64, linux-kernel

Jes Sorensen <jes@wildopensource.com> wrote:
>
> Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
>  available. Needed on ia64 for pages converted fo uncached mappings to
>  avoid it being accessed in cached mode after the conversion which can
>  lead to memory corruption. Introduces PG_uncached page flag for
>  marking pages uncached.

For some reason this patch still gives me the creeps.  Maybe it's because
we lose a page flag for something so obscure.

Nothing ever clears PG_uncached.  We'll end up with every page in the
machine marked as being uncached.

But then, nothing ever sets PG_uncached, either.  Is there some patch which
you're hiding from me?

If a page is marked uncached then it'll remain marked as uncached even
after it's unmapped.  Or will it?  Would like to see the other patch, please.

We should add PG_uncached checks to the page allocator.  Is this OK?


--- 25/mm/page_alloc.c~ia64-specific-dev-mem-handlers-checks	2005-03-09 22:53:12.000000000 -0800
+++ 25-akpm/mm/page_alloc.c	2005-03-09 22:53:44.000000000 -0800
@@ -108,6 +108,7 @@ static void bad_page(const char *functio
 			1 << PG_active	|
 			1 << PG_dirty	|
 			1 << PG_swapcache |
+			1 << PG_uncached |
 			1 << PG_writeback);
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
@@ -321,6 +322,7 @@ static inline void free_pages_check(cons
 			1 << PG_reclaim	|
 			1 << PG_slab	|
 			1 << PG_swapcache |
+			1 << PG_uncached |
 			1 << PG_writeback )))
 		bad_page(function, page);
 	if (PageDirty(page))
@@ -446,6 +448,7 @@ static void prep_new_page(struct page *p
 			1 << PG_dirty	|
 			1 << PG_reclaim	|
 			1 << PG_swapcache |
+			1 << PG_uncached |
 			1 << PG_writeback )))
 		bad_page(__FUNCTION__, page);
 
_


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch -mm series] ia64 specific /dev/mem handlers
  2005-03-10  6:55                   ` Andrew Morton
@ 2005-03-10 13:49                     ` Jes Sorensen
  0 siblings, 0 replies; 26+ messages in thread
From: Jes Sorensen @ 2005-03-10 13:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: hch, linux-ia64, linux-kernel

>>>>> "Andrew" == Andrew Morton <akpm@osdl.org> writes:

Andrew> Jes Sorensen <jes@wildopensource.com> wrote:
>>  Convert /dev/mem read/write calls to use arch_translate_mem_ptr if
>> available. Needed on ia64 for pages converted fo uncached mappings
>> to avoid it being accessed in cached mode after the conversion
>> which can lead to memory corruption. Introduces PG_uncached page
>> flag for marking pages uncached.

Andrew> For some reason this patch still gives me the creeps.  Maybe
Andrew> it's because we lose a page flag for something so obscure.

Andrew> Nothing ever clears PG_uncached.  We'll end up with every page
Andrew> in the machine marked as being uncached.

Actually there's restrictions to how many pages are getting converted
as converting pages over from cached to uncached isn't trivial on ia64.

Andrew> But then, nothing ever sets PG_uncached, either.  Is there
Andrew> some patch which you're hiding from me?

Actually I posted that earlier, but it must have gotten lost in the
noise. It's part of the genalloc/mspec patchset. I'll send it to you
directly.

Andrew> If a page is marked uncached then it'll remain marked as
Andrew> uncached even after it's unmapped.  Or will it?  Would like to
Andrew> see the other patch, please.

Coming your way in a jiffy.

Andrew> We should add PG_uncached checks to the page allocator.  Is
Andrew> this OK?

I don't see any problems with that. The way it's meant to be used is
that once pages are converted over, they don't go back into the
allocator.

Cheers,
Jes

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [patch -mm series] ia64 specific /dev/mem handlers
@ 2005-02-22 18:05 Luck, Tony
  0 siblings, 0 replies; 26+ messages in thread
From: Luck, Tony @ 2005-02-22 18:05 UTC (permalink / raw)
  To: Jes Sorensen; +Cc: Andrew Morton, linux-ia64, linux-kernel, Matthew Wilcox

>On Tue, Feb 22, 2005 at 09:41:04AM -0500, Jes Sorensen wrote:
>> >> + if (page->flags & PG_uncached)
>> 
>> Andrew> dude.  That ain't gonna work ;)
>> 
>> Pardon my lack of clue, but why not?
>
>I think you're supposed to always use test_bit() to check page flags

You defined PG_uncached as "20" ... so either:

	if (page->flags & (1<<PG_uncached))

or (much better) to use:

	test_bit(PG_uncached, &page->flags).

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2005-03-10 13:49 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-22  9:52 [patch -mm series] ia64 specific /dev/mem handlers Jes Sorensen
2005-02-22 10:03 ` Andrew Morton
2005-02-22 12:08   ` Andi Kleen
2005-02-22 14:41   ` Jes Sorensen
2005-02-22 17:52     ` Matthew Wilcox
2005-02-22 19:25       ` Andrew Morton
2005-02-22 19:35         ` Dave Hansen
2005-02-22 21:38           ` Jes Sorensen
2005-02-23  0:48             ` Dave Hansen
2005-02-22 21:34         ` Jes Sorensen
2005-02-22 22:39         ` Jes Sorensen
2005-02-22 23:34           ` Andrew Morton
2005-02-23 10:01             ` Jes Sorensen
2005-02-23 22:34               ` Christoph Hellwig
2005-02-24 16:11                 ` Jes Sorensen
2005-03-10  6:55                   ` Andrew Morton
2005-03-10 13:49                     ` Jes Sorensen
2005-02-22 22:03 ` Arjan van de Ven
2005-02-22 22:30   ` Jes Sorensen
2005-02-22 22:42     ` Arjan van de Ven
2005-02-23  8:12       ` Jes Sorensen
2005-02-23  8:24         ` Arjan van de Ven
2005-03-03 11:37 ` Andrew Morton
2005-03-03 12:53   ` Jes Sorensen
2005-03-04 12:26     ` Jes Sorensen
2005-02-22 18:05 Luck, Tony

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).