linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/2] PAT fix/optimization
@ 2008-09-13  0:00 Venkatesh Pallipadi
  2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
  2008-09-13  0:00 ` [patch 2/2] x86: Handle error returns in set_memory_* Venkatesh Pallipadi
  0 siblings, 2 replies; 24+ messages in thread
From: Venkatesh Pallipadi @ 2008-09-13  0:00 UTC (permalink / raw)
  To: arjan, mingo, tglx, hpa, andi
  Cc: linux-kernel, Suresh Siddha, Venkatesh Pallipadi

This week's PAT fix/optimization patches...

-- 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-13  0:00 [patch 0/2] PAT fix/optimization Venkatesh Pallipadi
@ 2008-09-13  0:00 ` Venkatesh Pallipadi
  2008-09-13 17:03   ` Jeremy Fitzhardinge
                     ` (2 more replies)
  2008-09-13  0:00 ` [patch 2/2] x86: Handle error returns in set_memory_* Venkatesh Pallipadi
  1 sibling, 3 replies; 24+ messages in thread
From: Venkatesh Pallipadi @ 2008-09-13  0:00 UTC (permalink / raw)
  To: arjan, mingo, tglx, hpa, andi
  Cc: linux-kernel, Suresh Siddha, Venkatesh Pallipadi

[-- Attachment #1: use_pg_arch_1_for_ram_tracking.patch --]
[-- Type: text/plain, Size: 5650 bytes --]

From: Suresh Siddha <suresh.b.siddha@intel.com>
Subject: [patch 1/2] x86: track memtype for RAM in page struct

Track the memtype for RAM pages in page struct instead of using the memtype
list. This avoids the explosion in the number of entries in memtype list
(of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
using PG_arch_1 bit in page->flags.

We still use the memtype list for non RAM pages.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

---
 arch/x86/mm/ioremap.c  |   19 +++++++++++
 arch/x86/mm/pat.c      |   83 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/page.h |    1 
 3 files changed, 103 insertions(+)

Index: tip/arch/x86/mm/ioremap.c
===================================================================
--- tip.orig/arch/x86/mm/ioremap.c	2008-09-12 16:03:33.000000000 -0700
+++ tip/arch/x86/mm/ioremap.c	2008-09-12 16:23:33.000000000 -0700
@@ -102,6 +102,25 @@ int page_is_ram(unsigned long pagenr)
 	return 0;
 }
 
+int pagerange_is_ram(unsigned long start, unsigned long end)
+{
+	int ram_page = 0, not_rampage = 0;
+	unsigned long page_nr;
+
+	for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+	     ++page_nr) {
+		if (page_is_ram(page_nr))
+			ram_page = 1;
+		else
+			not_rampage = 1;
+
+		if (ram_page == not_rampage)
+			return -1;
+	}
+
+	return ram_page;
+}
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
Index: tip/arch/x86/mm/pat.c
===================================================================
--- tip.orig/arch/x86/mm/pat.c	2008-09-12 16:03:33.000000000 -0700
+++ tip/arch/x86/mm/pat.c	2008-09-12 16:23:33.000000000 -0700
@@ -211,6 +211,75 @@ static struct memtype *cached_entry;
 static u64 cached_start;
 
 /*
+ * RED-PEN:  TODO: Add PageReserved() check aswell here,
+ * once we add SetPageReserved() to all the drivers using
+ * set_memory_* or set_pages_*
+ *
+ * This will help prevent accidentally freeing pages
+ * before setting the attribute back to WB.
+ */
+
+/*
+ * For RAM pages, mark the pages with non WB attribute using
+ * PG_arch_1. We allow only one set_memory_uc() or set_memory_wc()
+ * on a RAM page at a time before marking it as WB again. This is ok,
+ * because only one(driver) will be owning the page and doing set_memory_*()
+ * calls.
+ *
+ * For now, we use PG_arch_1 to track that the RAM page is being mapped
+ * as non WB. In future, based on the need we can use one or more flags
+ * (or some other mechanism in page_struct) to keep track of the actual
+ * attribute that the page is mapped to.
+ */
+static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
+		       unsigned long *new_type)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || test_bit(PG_arch_1, &page->flags))
+			goto out;
+
+		set_bit(PG_arch_1, &page->flags);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		clear_bit(PG_arch_1, &page->flags);
+	}
+
+	return -EINVAL;
+}
+
+static int free_ram_pages_type(u64 start, u64 end)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || !test_bit(PG_arch_1, &page->flags))
+			goto out;
+
+		clear_bit(PG_arch_1, &page->flags);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		set_bit(PG_arch_1, &page->flags);
+	}
+	return -EINVAL;
+}
+
+/*
  * req_type typically has one of the:
  * - _PAGE_CACHE_WB
  * - _PAGE_CACHE_WC
@@ -232,6 +301,7 @@ int reserve_memtype(u64 start, u64 end, 
 	unsigned long actual_type;
 	struct list_head *where;
 	int err = 0;
+	int is_range_ram;
 
  	BUG_ON(start >= end); /* end is exclusive */
 
@@ -270,6 +340,12 @@ int reserve_memtype(u64 start, u64 end, 
 		actual_type = pat_x_mtrr_type(start, end,
 					      req_type & _PAGE_CACHE_MASK);
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return reserve_ram_pages_type(start, end, req_type, new_type);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
@@ -358,6 +434,7 @@ int free_memtype(u64 start, u64 end)
 {
 	struct memtype *entry;
 	int err = -EINVAL;
+	int is_range_ram;
 
 	if (!pat_enabled)
 		return 0;
@@ -366,6 +443,12 @@ int free_memtype(u64 start, u64 end)
 	if (is_ISA_range(start, end - 1))
 		return 0;
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return free_ram_pages_type(start, end);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	spin_lock(&memtype_lock);
 	list_for_each_entry(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
Index: tip/include/asm-x86/page.h
===================================================================
--- tip.orig/include/asm-x86/page.h	2008-09-12 16:03:33.000000000 -0700
+++ tip/include/asm-x86/page.h	2008-09-12 16:23:33.000000000 -0700
@@ -57,6 +57,7 @@ typedef struct { pgdval_t pgd; } pgd_t;
 typedef struct { pgprotval_t pgprot; } pgprot_t;
 
 extern int page_is_ram(unsigned long pagenr);
+extern int pagerange_is_ram(unsigned long start, unsigned long end);
 extern int devmem_is_allowed(unsigned long pagenr);
 extern void map_devmem(unsigned long pfn, unsigned long size,
 		       pgprot_t vma_prot);

-- 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-13  0:00 [patch 0/2] PAT fix/optimization Venkatesh Pallipadi
  2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
@ 2008-09-13  0:00 ` Venkatesh Pallipadi
  2008-09-14 13:27   ` Ingo Molnar
  2008-09-14 14:10   ` Ingo Molnar
  1 sibling, 2 replies; 24+ messages in thread
From: Venkatesh Pallipadi @ 2008-09-13  0:00 UTC (permalink / raw)
  To: arjan, mingo, tglx, hpa, andi
  Cc: linux-kernel, Venkatesh Pallipadi, Suresh Siddha

[-- Attachment #1: set_memory_errors.patch --]
[-- Type: text/plain, Size: 2676 bytes --]

Correctly handle the error returns from set_memory_*. We have to free memtype
on error return path.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>

---
 arch/x86/mm/pageattr.c |   35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

Index: tip/arch/x86/mm/pageattr.c
===================================================================
--- tip.orig/arch/x86/mm/pageattr.c	2008-09-12 16:03:36.000000000 -0700
+++ tip/arch/x86/mm/pageattr.c	2008-09-12 16:31:12.000000000 -0700
@@ -929,6 +929,8 @@ int _set_memory_uc(unsigned long addr, i
 
 int set_memory_uc(unsigned long addr, int numpages)
 {
+	int ret;
+
 	/*
 	 * for now UC MINUS. see comments in ioremap_nocache()
 	 */
@@ -936,13 +938,19 @@ int set_memory_uc(unsigned long addr, in
 			    _PAGE_CACHE_UC_MINUS, NULL))
 		return -EINVAL;
 
-	return _set_memory_uc(addr, numpages);
+	ret = _set_memory_uc(addr, numpages);
+	if (ret)
+		free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+
+	return ret;
 }
 EXPORT_SYMBOL(set_memory_uc);
 
 int set_memory_array_uc(unsigned long *addr, int addrinarray)
 {
-	unsigned long start;
+	int ret;
+	int reserve_fail = 0;
+	unsigned long start = 0;
 	unsigned long end;
 	int i;
 	/*
@@ -955,17 +963,24 @@ int set_memory_array_uc(unsigned long *a
 				break;
 			i++;
 		}
-		if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
+		if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) {
+			ret = -EINVAL;
+			reserve_fail = 1;
 			goto out;
+		}
 	}
 
-	return change_page_attr_set(addr, addrinarray,
+	ret = change_page_attr_set(addr, addrinarray,
 				    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
+
+	if (!ret)
+		return ret;
+
 out:
 	for (i = 0; i < addrinarray; i++) {
 		unsigned long tmp = __pa(addr[i]);
 
-		if (tmp == start)
+		if (reserve_fail && tmp == start)
 			break;
 		for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
 			if (end != __pa(addr[i + 1]))
@@ -974,7 +989,7 @@ out:
 		}
 		free_memtype(tmp, end);
 	}
-	return -EINVAL;
+	return ret;
 }
 EXPORT_SYMBOL(set_memory_array_uc);
 
@@ -986,6 +1001,8 @@ int _set_memory_wc(unsigned long addr, i
 
 int set_memory_wc(unsigned long addr, int numpages)
 {
+	int ret;
+
 	if (!pat_enabled)
 		return set_memory_uc(addr, numpages);
 
@@ -993,7 +1010,11 @@ int set_memory_wc(unsigned long addr, in
 		_PAGE_CACHE_WC, NULL))
 		return -EINVAL;
 
-	return _set_memory_wc(addr, numpages);
+	ret = _set_memory_wc(addr, numpages);
+	if (ret)
+		free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+
+	return ret;
 }
 EXPORT_SYMBOL(set_memory_wc);
 

-- 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
@ 2008-09-13 17:03   ` Jeremy Fitzhardinge
  2008-09-14 13:29     ` Ingo Molnar
  2008-09-13 17:24   ` [patch 1/2] x86: track memtype for RAM in page struct Frans Pop
  2008-09-14 14:12   ` Ingo Molnar
  2 siblings, 1 reply; 24+ messages in thread
From: Jeremy Fitzhardinge @ 2008-09-13 17:03 UTC (permalink / raw)
  To: Venkatesh Pallipadi
  Cc: arjan, mingo, tglx, hpa, andi, linux-kernel, Suresh Siddha

Venkatesh Pallipadi wrote:
> From: Suresh Siddha <suresh.b.siddha@intel.com>
> Subject: [patch 1/2] x86: track memtype for RAM in page struct
>
> Track the memtype for RAM pages in page struct instead of using the memtype
> list. This avoids the explosion in the number of entries in memtype list
> (of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
> using PG_arch_1 bit in page->flags.
>   

Please define PG_arch_1 a proper name so that its easy to tell its being
used just by looking at page-flags.h.

    J

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
  2008-09-13 17:03   ` Jeremy Fitzhardinge
@ 2008-09-13 17:24   ` Frans Pop
  2008-09-14 14:12   ` Ingo Molnar
  2 siblings, 0 replies; 24+ messages in thread
From: Frans Pop @ 2008-09-13 17:24 UTC (permalink / raw)
  To: Venkatesh Pallipadi
  Cc: arjan, mingo, tglx, hpa, andi, linux-kernel, suresh.b.siddha,
	venkatesh.pallipadi

Corrections for a few typos and some suggestions for minor improvements
in the comment.

Cheers,
FJP

Venkatesh Pallipadi wrote:
> --- tip.orig/arch/x86/mm/pat.c  2008-09-12 16:03:33.000000000 -0700
> +++ tip/arch/x86/mm/pat.c       2008-09-12 16:23:33.000000000 -0700
> @@ -211,6 +211,75 @@ static struct memtype *cached_entry;
> static u64 cached_start;
> 
> /*
> + * RED-PEN:  TODO: Add PageReserved() check aswell here,

"as well"

> + * once we add SetPageReserved() to all the drivers using
> + * set_memory_* or set_pages_*
> + *
> + * This will help prevent accidentally freeing pages
> + * before setting the attribute back to WB.
> + */
> +
> +/*
> + * For RAM pages, mark the pages with non WB attribute using

s/non WB/without the WB/ ?

> + * PG_arch_1. We allow only one set_memory_uc() or set_memory_wc()
> + * on a RAM page at a time before marking it as WB again. This is ok,
> + * because only one(driver) will be owning the page and doing

"one (driver)"
Maybe move "set_memory_*()" to next line (looks better).

> + * set_memory_*() calls.
> + *
> + * For now, we use PG_arch_1 to track that the RAM page is being mapped
> + * as non WB. In future, based on the need we can use one or more flags

s/based on the need/if needed,/

> + * (or some other mechanism in page_struct) to keep track of the actual
> + * attribute that the page is mapped to.
> + */

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-13  0:00 ` [patch 2/2] x86: Handle error returns in set_memory_* Venkatesh Pallipadi
@ 2008-09-14 13:27   ` Ingo Molnar
  2008-09-14 14:35     ` Frans Pop
  2008-09-14 14:10   ` Ingo Molnar
  1 sibling, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-14 13:27 UTC (permalink / raw)
  To: Venkatesh Pallipadi
  Cc: arjan, tglx, hpa, andi, linux-kernel, Suresh Siddha, Frans Pop


* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> Correctly handle the error returns from set_memory_*. We have to free 
> memtype on error return path.

ah, so this is what caused the display artifacts reported by Frans Pop?

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-13 17:03   ` Jeremy Fitzhardinge
@ 2008-09-14 13:29     ` Ingo Molnar
  2008-09-14 14:18       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-14 13:29 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Venkatesh Pallipadi, arjan, tglx, hpa, andi, linux-kernel, Suresh Siddha


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Venkatesh Pallipadi wrote:
> > From: Suresh Siddha <suresh.b.siddha@intel.com>
> > Subject: [patch 1/2] x86: track memtype for RAM in page struct
> >
> > Track the memtype for RAM pages in page struct instead of using the memtype
> > list. This avoids the explosion in the number of entries in memtype list
> > (of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
> > using PG_arch_1 bit in page->flags.
> >   
> 
> Please define PG_arch_1 a proper name so that its easy to tell its 
> being used just by looking at page-flags.h.

it should be defined in include/asm-x86/page.h though, not in 
page-flags.h - other architectures are using this flag for other 
purposes.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-13  0:00 ` [patch 2/2] x86: Handle error returns in set_memory_* Venkatesh Pallipadi
  2008-09-14 13:27   ` Ingo Molnar
@ 2008-09-14 14:10   ` Ingo Molnar
  2008-09-30  8:36     ` Frans Pop
  1 sibling, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-14 14:10 UTC (permalink / raw)
  To: Venkatesh Pallipadi; +Cc: arjan, tglx, hpa, andi, linux-kernel, Suresh Siddha


* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> Correctly handle the error returns from set_memory_*. We have to free 
> memtype on error return path.

we should emit a kernel warning as well if any of those calls fail. 
Drivers should not be randomly poking on RAM ranges.

i'm also missing some background on this, could you please explain in a 
bit more detail about what errors there were triggered, and how they 
caused the display artifacts?

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
  2008-09-13 17:03   ` Jeremy Fitzhardinge
  2008-09-13 17:24   ` [patch 1/2] x86: track memtype for RAM in page struct Frans Pop
@ 2008-09-14 14:12   ` Ingo Molnar
  2008-09-23 21:48     ` Pallipadi, Venkatesh
  2 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-14 14:12 UTC (permalink / raw)
  To: Venkatesh Pallipadi
  Cc: arjan, tglx, hpa, andi, linux-kernel, Suresh Siddha, Arjan van de Ven


* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> From: Suresh Siddha <suresh.b.siddha@intel.com>
> Subject: [patch 1/2] x86: track memtype for RAM in page struct
> 
> Track the memtype for RAM pages in page struct instead of using the 
> memtype list. This avoids the explosion in the number of entries in 
> memtype list (of the order of 20,000 with AGP) and makes the PAT 
> tracking simpler. We are using PG_arch_1 bit in page->flags.

this is in contradiction to this comment:

>  /*
> + * RED-PEN:  TODO: Add PageReserved() check aswell here,
> + * once we add SetPageReserved() to all the drivers using
> + * set_memory_* or set_pages_*

if it's marked PG_arch_1, why is there any need for PageReserved?

> + *
> + * This will help prevent accidentally freeing pages
> + * before setting the attribute back to WB.

setting it PageReserved is a hack. We should set it PG_arch_1 and extend 
the page allocator to emit a kernel warning if a PG_arch_1 page is 
freed.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-14 13:29     ` Ingo Molnar
@ 2008-09-14 14:18       ` Jeremy Fitzhardinge
  2008-09-14 14:22         ` Ingo Molnar
  0 siblings, 1 reply; 24+ messages in thread
From: Jeremy Fitzhardinge @ 2008-09-14 14:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Venkatesh Pallipadi, arjan, tglx, hpa, andi, linux-kernel,
	Suresh Siddha, Nick Piggin, Christoph Lameter

Ingo Molnar wrote:
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
>   
>> Venkatesh Pallipadi wrote:
>>     
>>> From: Suresh Siddha <suresh.b.siddha@intel.com>
>>> Subject: [patch 1/2] x86: track memtype for RAM in page struct
>>>
>>> Track the memtype for RAM pages in page struct instead of using the memtype
>>> list. This avoids the explosion in the number of entries in memtype list
>>> (of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
>>> using PG_arch_1 bit in page->flags.
>>>   
>>>       
>> Please define PG_arch_1 a proper name so that its easy to tell its 
>> being used just by looking at page-flags.h.
>>     
>
> it should be defined in include/asm-x86/page.h though, not in 
> page-flags.h - other architectures are using this flag for other 
> purposes.
>   

No, other shared-use flags are all defined in page-flags.h:

enum pageflags {
[...]
	__NR_PAGEFLAGS,

	/* Filesystems */
	PG_checked = PG_owner_priv_1,

	/* XEN */
	PG_pinned = PG_owner_priv_1,
	PG_savepinned = PG_dirty,

	/* SLOB */
	PG_slob_page = PG_active,
	PG_slob_free = PG_private,

	/* SLUB */
	PG_slub_frozen = PG_active,
	PG_slub_debug = PG_error,
};


We could #ifdef CONFIG_X86 just to make it clear we're talking about a
specific X86 usage.  But page-flags.h does seem to have become the
central authority on all struct page flags usage.

    J

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-14 14:18       ` Jeremy Fitzhardinge
@ 2008-09-14 14:22         ` Ingo Molnar
  2008-09-23 21:46           ` Venki Pallipadi
  0 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-14 14:22 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Venkatesh Pallipadi, arjan, tglx, hpa, andi, linux-kernel,
	Suresh Siddha, Nick Piggin, Christoph Lameter


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> > it should be defined in include/asm-x86/page.h though, not in 
> > page-flags.h - other architectures are using this flag for other 
> > purposes.
> 
> No, other shared-use flags are all defined in page-flags.h:
> 
> enum pageflags {
> [...]
> 	__NR_PAGEFLAGS,
> 
> 	/* Filesystems */
> 	PG_checked = PG_owner_priv_1,
> 
> 	/* XEN */
> 	PG_pinned = PG_owner_priv_1,
> 	PG_savepinned = PG_dirty,
> 
> 	/* SLOB */
> 	PG_slob_page = PG_active,
> 	PG_slob_free = PG_private,
> 
> 	/* SLUB */
> 	PG_slub_frozen = PG_active,
> 	PG_slub_debug = PG_error,
> };
> 
> 
> We could #ifdef CONFIG_X86 just to make it clear we're talking about a 
> specific X86 usage.  But page-flags.h does seem to have become the 
> central authority on all struct page flags usage.

well, in case of the PG_arch_* flags, they are all defined in 
architecture files:

./include/asm-parisc/pgtable.h:#define PG_dcache_dirty         PG_arch_1
./include/asm-mips/cacheflush.h:#define PG_dcache_dirty			PG_arch_1
./arch/sparc64/mm/init.c:#define PG_dcache_dirty		PG_arch_1
./arch/sh/include/cpu-sh4/cpu/cacheflush.h:#define PG_mapped	PG_arch_1
./arch/sh/include/cpu-sh3/cpu/cacheflush.h:#define PG_mapped	PG_arch_1
./arch/arm/include/asm/cacheflush.h:#define PG_dcache_dirty PG_arch_1

they are explicitly reserved for per architecture details.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-14 13:27   ` Ingo Molnar
@ 2008-09-14 14:35     ` Frans Pop
  0 siblings, 0 replies; 24+ messages in thread
From: Frans Pop @ 2008-09-14 14:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Venkatesh Pallipadi, arjan, tglx, hpa, andi, linux-kernel, Suresh Siddha

On Sunday 14 September 2008, Ingo Molnar wrote:
> * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:
> > Correctly handle the error returns from set_memory_*. We have to free
> > memtype on error return path.
>
> ah, so this is what caused the display artifacts reported by Frans Pop?

No, I have only tested with the first patch applied, not this second one.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-14 14:22         ` Ingo Molnar
@ 2008-09-23 21:46           ` Venki Pallipadi
  2008-09-23 21:59             ` Christoph Lameter
  0 siblings, 1 reply; 24+ messages in thread
From: Venki Pallipadi @ 2008-09-23 21:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, Pallipadi, Venkatesh, arjan, tglx, hpa,
	andi, linux-kernel, Siddha, Suresh B, Nick Piggin,
	Christoph Lameter

On Sun, Sep 14, 2008 at 07:22:13AM -0700, Ingo Molnar wrote:
> 
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> 
> > > it should be defined in include/asm-x86/page.h though, not in
> > > page-flags.h - other architectures are using this flag for other
> > > purposes.
> >
> > No, other shared-use flags are all defined in page-flags.h:
> >
> > enum pageflags {
> > [...]
> >       __NR_PAGEFLAGS,
> >
> >       /* Filesystems */
> >       PG_checked = PG_owner_priv_1,
> >
> >       /* XEN */
> >       PG_pinned = PG_owner_priv_1,
> >       PG_savepinned = PG_dirty,
> >
> >       /* SLOB */
> >       PG_slob_page = PG_active,
> >       PG_slob_free = PG_private,
> >
> >       /* SLUB */
> >       PG_slub_frozen = PG_active,
> >       PG_slub_debug = PG_error,
> > };
> >
> >
> > We could #ifdef CONFIG_X86 just to make it clear we're talking about a
> > specific X86 usage.  But page-flags.h does seem to have become the
> > central authority on all struct page flags usage.
> 
> well, in case of the PG_arch_* flags, they are all defined in
> architecture files:
> 
> ./include/asm-parisc/pgtable.h:#define PG_dcache_dirty         PG_arch_1
> ./include/asm-mips/cacheflush.h:#define PG_dcache_dirty                 PG_arch_1
> ./arch/sparc64/mm/init.c:#define PG_dcache_dirty                PG_arch_1
> ./arch/sh/include/cpu-sh4/cpu/cacheflush.h:#define PG_mapped    PG_arch_1
> ./arch/sh/include/cpu-sh3/cpu/cacheflush.h:#define PG_mapped    PG_arch_1
> ./arch/arm/include/asm/cacheflush.h:#define PG_dcache_dirty PG_arch_1
> 
> they are explicitly reserved for per architecture details.
> 

Below is the updated patch.

Thanks,
Venki

x86: track memtype for RAM in page struct

From: Suresh Siddha <suresh.b.siddha@intel.com>
Subject: x86: track memtype for RAM in page struct

Track the memtype for RAM pages in page struct instead of using the memtype
list. This avoids the explosion in the number of entries in memtype list
(of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
using PG_arch_1 bit in page->flags.

We still use the memtype list for non RAM pages.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

---
 arch/x86/mm/ioremap.c        |   19 +++++++++
 arch/x86/mm/pat.c            |   83 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/cacheflush.h |    1 
 include/asm-x86/page.h       |    1 
 4 files changed, 104 insertions(+)

Index: tip/arch/x86/mm/ioremap.c
===================================================================
--- tip.orig/arch/x86/mm/ioremap.c	2008-09-23 13:31:31.000000000 -0700
+++ tip/arch/x86/mm/ioremap.c	2008-09-23 13:32:26.000000000 -0700
@@ -102,6 +102,25 @@ int page_is_ram(unsigned long pagenr)
 	return 0;
 }
 
+int pagerange_is_ram(unsigned long start, unsigned long end)
+{
+	int ram_page = 0, not_rampage = 0;
+	unsigned long page_nr;
+
+	for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+	     ++page_nr) {
+		if (page_is_ram(page_nr))
+			ram_page = 1;
+		else
+			not_rampage = 1;
+
+		if (ram_page == not_rampage)
+			return -1;
+	}
+
+	return ram_page;
+}
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
Index: tip/arch/x86/mm/pat.c
===================================================================
--- tip.orig/arch/x86/mm/pat.c	2008-09-23 13:31:31.000000000 -0700
+++ tip/arch/x86/mm/pat.c	2008-09-23 13:32:26.000000000 -0700
@@ -211,6 +211,75 @@ static struct memtype *cached_entry;
 static u64 cached_start;
 
 /*
+ * RED-PEN:  TODO: Add PageReserved() check as well here,
+ * once we add SetPageReserved() to all the drivers using
+ * set_memory_* or set_pages_*.
+ *
+ * This will help prevent accidentally freeing pages
+ * before setting the attribute back to WB.
+ */
+
+/*
+ * For RAM pages, mark the pages as non WB memory type using
+ * PG_non_WB (PG_arch_1). We allow only one set_memory_uc() or
+ * set_memory_wc() on a RAM page at a time before marking it as WB again.
+ * This is ok, because only one driver will be owning the page and
+ * doing set_memory_*() calls.
+ *
+ * For now, we use PG_non_WB to track that the RAM page is being mapped
+ * as non WB. In future, we will have to use one more flag
+ * (or some other mechanism in page_struct) to distinguish between
+ * UC and WC mapping.
+ */
+static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
+		       unsigned long *new_type)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || test_bit(PG_non_WB, &page->flags))
+			goto out;
+
+		set_bit(PG_non_WB, &page->flags);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		clear_bit(PG_non_WB, &page->flags);
+	}
+
+	return -EINVAL;
+}
+
+static int free_ram_pages_type(u64 start, u64 end)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || !test_bit(PG_non_WB, &page->flags))
+			goto out;
+
+		clear_bit(PG_non_WB, &page->flags);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		set_bit(PG_non_WB, &page->flags);
+	}
+	return -EINVAL;
+}
+
+/*
  * req_type typically has one of the:
  * - _PAGE_CACHE_WB
  * - _PAGE_CACHE_WC
@@ -232,6 +301,7 @@ int reserve_memtype(u64 start, u64 end, 
 	unsigned long actual_type;
 	struct list_head *where;
 	int err = 0;
+	int is_range_ram;
 
  	BUG_ON(start >= end); /* end is exclusive */
 
@@ -270,6 +340,12 @@ int reserve_memtype(u64 start, u64 end, 
 		actual_type = pat_x_mtrr_type(start, end,
 					      req_type & _PAGE_CACHE_MASK);
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return reserve_ram_pages_type(start, end, req_type, new_type);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
@@ -358,6 +434,7 @@ int free_memtype(u64 start, u64 end)
 {
 	struct memtype *entry;
 	int err = -EINVAL;
+	int is_range_ram;
 
 	if (!pat_enabled)
 		return 0;
@@ -366,6 +443,12 @@ int free_memtype(u64 start, u64 end)
 	if (is_ISA_range(start, end - 1))
 		return 0;
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return free_ram_pages_type(start, end);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	spin_lock(&memtype_lock);
 	list_for_each_entry(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
Index: tip/include/asm-x86/page.h
===================================================================
--- tip.orig/include/asm-x86/page.h	2008-09-23 13:31:31.000000000 -0700
+++ tip/include/asm-x86/page.h	2008-09-23 13:32:26.000000000 -0700
@@ -57,6 +57,7 @@ typedef struct { pgdval_t pgd; } pgd_t;
 typedef struct { pgprotval_t pgprot; } pgprot_t;
 
 extern int page_is_ram(unsigned long pagenr);
+extern int pagerange_is_ram(unsigned long start, unsigned long end);
 extern int devmem_is_allowed(unsigned long pagenr);
 extern void map_devmem(unsigned long pfn, unsigned long size,
 		       pgprot_t vma_prot);
Index: tip/include/asm-x86/cacheflush.h
===================================================================
--- tip.orig/include/asm-x86/cacheflush.h	2008-09-23 13:32:10.000000000 -0700
+++ tip/include/asm-x86/cacheflush.h	2008-09-23 13:33:06.000000000 -0700
@@ -24,6 +24,7 @@
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
 	memcpy((dst), (src), (len))
 
+#define PG_non_WB				PG_arch_1
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-14 14:12   ` Ingo Molnar
@ 2008-09-23 21:48     ` Pallipadi, Venkatesh
  0 siblings, 0 replies; 24+ messages in thread
From: Pallipadi, Venkatesh @ 2008-09-23 21:48 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: arjan, tglx, hpa, andi, linux-kernel, Siddha, Suresh B, Arjan van de Ven



>-----Original Message-----
>From: Ingo Molnar [mailto:mingo@elte.hu]
>Sent: Sunday, September 14, 2008 7:13 AM
>To: Pallipadi, Venkatesh
>Cc: arjan@linux.intel.com; tglx@linutronix.de; hpa@zytor.com;
>andi@firstfloor.org; linux-kernel@vger.kernel.org; Siddha,
>Suresh B; Arjan van de Ven
>Subject: Re: [patch 1/2] x86: track memtype for RAM in page struct
>
>
>* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:
>
>> From: Suresh Siddha <suresh.b.siddha@intel.com>
>> Subject: [patch 1/2] x86: track memtype for RAM in page struct
>>
>> Track the memtype for RAM pages in page struct instead of using the
>> memtype list. This avoids the explosion in the number of entries in
>> memtype list (of the order of 20,000 with AGP) and makes the PAT
>> tracking simpler. We are using PG_arch_1 bit in page->flags.
>
>this is in contradiction to this comment:
>
>>  /*
>> + * RED-PEN:  TODO: Add PageReserved() check aswell here,
>> + * once we add SetPageReserved() to all the drivers using
>> + * set_memory_* or set_pages_*
>
>if it's marked PG_arch_1, why is there any need for PageReserved?
>

Marking PageReserved will take care of all the places where Reserved is being checked,
like swap etc. And we don't have to add checks for extra bit in architecture independent
code.

Thanks,
Venki

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct
  2008-09-23 21:46           ` Venki Pallipadi
@ 2008-09-23 21:59             ` Christoph Lameter
  2008-09-24 15:53               ` [patch 1/2] x86: track memtype for RAM in page struct - v3 Venki Pallipadi
  0 siblings, 1 reply; 24+ messages in thread
From: Christoph Lameter @ 2008-09-23 21:59 UTC (permalink / raw)
  To: Venki Pallipadi
  Cc: Ingo Molnar, Jeremy Fitzhardinge, arjan, tglx, hpa, andi,
	linux-kernel, Siddha, Suresh B, Nick Piggin

I'd suggest to avoid the open coding of page flag handling using set bit etc.

It may be best to slowly migrate the arch specific definitions into
page-flags.h. Otherwise we wont have a central authority on page flags.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-23 21:59             ` Christoph Lameter
@ 2008-09-24 15:53               ` Venki Pallipadi
  2008-09-27 17:59                 ` Ingo Molnar
  2008-09-30  7:28                 ` Nick Piggin
  0 siblings, 2 replies; 24+ messages in thread
From: Venki Pallipadi @ 2008-09-24 15:53 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Pallipadi, Venkatesh, Ingo Molnar, Jeremy Fitzhardinge, arjan,
	tglx, hpa, andi, linux-kernel, Siddha, Suresh B, Nick Piggin

On Tue, Sep 23, 2008 at 02:59:10PM -0700, Christoph Lameter wrote:
> I'd suggest to avoid the open coding of page flag handling using set bit etc.
> 
> It may be best to slowly migrate the arch specific definitions into
> page-flags.h. Otherwise we wont have a central authority on page flags.
> 

OK. Below is the updated patch that does not use set/clear directly on page
flag.

x86: track memtype for RAM in page struct

From: Suresh Siddha <suresh.b.siddha@intel.com>
Subject: x86: track memtype for RAM in page struct

Track the memtype for RAM pages in page struct instead of using the memtype
list. This avoids the explosion in the number of entries in memtype list
(of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are
using PG_arch_1 bit in page->flags.

We still use the memtype list for non RAM pages.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

---
 arch/x86/mm/ioremap.c        |   19 +++++++++
 arch/x86/mm/pat.c            |   83 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/cacheflush.h |    2 +
 include/asm-x86/page.h       |    1 
 4 files changed, 105 insertions(+)

Index: tip/arch/x86/mm/ioremap.c
===================================================================
--- tip.orig/arch/x86/mm/ioremap.c	2008-09-23 13:31:31.000000000 -0700
+++ tip/arch/x86/mm/ioremap.c	2008-09-24 08:39:29.000000000 -0700
@@ -102,6 +102,25 @@ int page_is_ram(unsigned long pagenr)
 	return 0;
 }
 
+int pagerange_is_ram(unsigned long start, unsigned long end)
+{
+	int ram_page = 0, not_rampage = 0;
+	unsigned long page_nr;
+
+	for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+	     ++page_nr) {
+		if (page_is_ram(page_nr))
+			ram_page = 1;
+		else
+			not_rampage = 1;
+
+		if (ram_page == not_rampage)
+			return -1;
+	}
+
+	return ram_page;
+}
+
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
Index: tip/arch/x86/mm/pat.c
===================================================================
--- tip.orig/arch/x86/mm/pat.c	2008-09-23 13:31:31.000000000 -0700
+++ tip/arch/x86/mm/pat.c	2008-09-24 08:39:27.000000000 -0700
@@ -211,6 +211,75 @@ static struct memtype *cached_entry;
 static u64 cached_start;
 
 /*
+ * RED-PEN:  TODO: Add PageReserved() check as well here,
+ * once we add SetPageReserved() to all the drivers using
+ * set_memory_* or set_pages_*.
+ *
+ * This will help prevent accidentally freeing pages
+ * before setting the attribute back to WB.
+ */
+
+/*
+ * For RAM pages, mark the pages as non WB memory type using
+ * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
+ * set_memory_wc() on a RAM page at a time before marking it as WB again.
+ * This is ok, because only one driver will be owning the page and
+ * doing set_memory_*() calls.
+ *
+ * For now, we use PageNonWB to track that the RAM page is being mapped
+ * as non WB. In future, we will have to use one more flag
+ * (or some other mechanism in page_struct) to distinguish between
+ * UC and WC mapping.
+ */
+static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
+		       unsigned long *new_type)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || PageNonWB(page))
+			goto out;
+
+		SetPageNonWB(page);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		ClearPageNonWB(page);
+	}
+
+	return -EINVAL;
+}
+
+static int free_ram_pages_type(u64 start, u64 end)
+{
+	struct page *page;
+	u64 pfn, end_pfn;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
+		page = pfn_to_page(pfn);
+		if (page_mapped(page) || !PageNonWB(page))
+			goto out;
+
+		ClearPageNonWB(page);
+	}
+	return 0;
+
+out:
+	end_pfn = pfn;
+	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+		page = pfn_to_page(pfn);
+		SetPageNonWB(page);
+	}
+	return -EINVAL;
+}
+
+/*
  * req_type typically has one of the:
  * - _PAGE_CACHE_WB
  * - _PAGE_CACHE_WC
@@ -232,6 +301,7 @@ int reserve_memtype(u64 start, u64 end, 
 	unsigned long actual_type;
 	struct list_head *where;
 	int err = 0;
+	int is_range_ram;
 
  	BUG_ON(start >= end); /* end is exclusive */
 
@@ -270,6 +340,12 @@ int reserve_memtype(u64 start, u64 end, 
 		actual_type = pat_x_mtrr_type(start, end,
 					      req_type & _PAGE_CACHE_MASK);
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return reserve_ram_pages_type(start, end, req_type, new_type);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
 		return -ENOMEM;
@@ -358,6 +434,7 @@ int free_memtype(u64 start, u64 end)
 {
 	struct memtype *entry;
 	int err = -EINVAL;
+	int is_range_ram;
 
 	if (!pat_enabled)
 		return 0;
@@ -366,6 +443,12 @@ int free_memtype(u64 start, u64 end)
 	if (is_ISA_range(start, end - 1))
 		return 0;
 
+	is_range_ram = pagerange_is_ram(start,end);
+	if (is_range_ram == 1)
+		return free_ram_pages_type(start, end);
+	else if (is_range_ram < 0)
+		return -EINVAL;
+
 	spin_lock(&memtype_lock);
 	list_for_each_entry(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
Index: tip/include/asm-x86/page.h
===================================================================
--- tip.orig/include/asm-x86/page.h	2008-09-23 13:31:31.000000000 -0700
+++ tip/include/asm-x86/page.h	2008-09-23 13:32:26.000000000 -0700
@@ -57,6 +57,7 @@ typedef struct { pgdval_t pgd; } pgd_t;
 typedef struct { pgprotval_t pgprot; } pgprot_t;
 
 extern int page_is_ram(unsigned long pagenr);
+extern int pagerange_is_ram(unsigned long start, unsigned long end);
 extern int devmem_is_allowed(unsigned long pagenr);
 extern void map_devmem(unsigned long pfn, unsigned long size,
 		       pgprot_t vma_prot);
Index: tip/include/asm-x86/cacheflush.h
===================================================================
--- tip.orig/include/asm-x86/cacheflush.h	2008-09-23 13:32:10.000000000 -0700
+++ tip/include/asm-x86/cacheflush.h	2008-09-24 08:36:02.000000000 -0700
@@ -24,6 +24,8 @@
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
 	memcpy((dst), (src), (len))
 
+#define PG_non_WB				PG_arch_1
+PAGEFLAG(NonWB, non_WB)
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-24 15:53               ` [patch 1/2] x86: track memtype for RAM in page struct - v3 Venki Pallipadi
@ 2008-09-27 17:59                 ` Ingo Molnar
  2008-09-30  7:28                 ` Nick Piggin
  1 sibling, 0 replies; 24+ messages in thread
From: Ingo Molnar @ 2008-09-27 17:59 UTC (permalink / raw)
  To: Venki Pallipadi
  Cc: Christoph Lameter, Jeremy Fitzhardinge, arjan, tglx, hpa, andi,
	linux-kernel, Siddha, Suresh B, Nick Piggin


* Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> On Tue, Sep 23, 2008 at 02:59:10PM -0700, Christoph Lameter wrote:
> > I'd suggest to avoid the open coding of page flag handling using set bit etc.
> > 
> > It may be best to slowly migrate the arch specific definitions into
> > page-flags.h. Otherwise we wont have a central authority on page flags.
> > 
> 
> OK. Below is the updated patch that does not use set/clear directly on page
> flag.
> 
> x86: track memtype for RAM in page struct
> 
> From: Suresh Siddha <suresh.b.siddha@intel.com>
> Subject: x86: track memtype for RAM in page struct

applied to tip/x86/pat, thanks! Nice patch.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-24 15:53               ` [patch 1/2] x86: track memtype for RAM in page struct - v3 Venki Pallipadi
  2008-09-27 17:59                 ` Ingo Molnar
@ 2008-09-30  7:28                 ` Nick Piggin
  2008-09-30 11:21                   ` Ingo Molnar
  1 sibling, 1 reply; 24+ messages in thread
From: Nick Piggin @ 2008-09-30  7:28 UTC (permalink / raw)
  To: Venki Pallipadi
  Cc: Christoph Lameter, Ingo Molnar, Jeremy Fitzhardinge, arjan, tglx,
	hpa, andi, linux-kernel, Siddha, Suresh B

On Thursday 25 September 2008 01:53, Venki Pallipadi wrote:

>  /*
> + * RED-PEN:  TODO: Add PageReserved() check as well here,
> + * once we add SetPageReserved() to all the drivers using
> + * set_memory_* or set_pages_*.
> + *
> + * This will help prevent accidentally freeing pages
> + * before setting the attribute back to WB.
> + */

I'd rather we didn't add any more uses of PageReserved without a
really good reason.

At this point in time (or at least last time I looked, a year or
two ago), it isn't a whole lot of work to remove PG_reserved
completely. If the waters get muddied up again, it could require
another significant rework to remove it in future...

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-14 14:10   ` Ingo Molnar
@ 2008-09-30  8:36     ` Frans Pop
  2008-09-30 21:29       ` Suresh Siddha
  0 siblings, 1 reply; 24+ messages in thread
From: Frans Pop @ 2008-09-30  8:36 UTC (permalink / raw)
  To: venkatesh.pallipadi
  Cc: Ingo Molnar, arjan, tglx, hpa, andi, linux-kernel, suresh.b.siddha

Ingo Molnar wrote:
> * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> wrote:
>> Correctly handle the error returns from set_memory_*. We have to free
>> memtype on error return path.
> 
> we should emit a kernel warning as well if any of those calls fail.
> Drivers should not be randomly poking on RAM ranges.

Has this comment from Ingo been addressed already?
 
> i'm also missing some background on this, could you please explain in a
> bit more detail about what errors there were triggered, and how they
> caused the display artifacts?

I would be interested in this as well.

As I mentioned before, I only tested with the first of the two patches  
(as you asked me to) and that solved the artifacts for me. What's the 
explanation behind that?

What is the relationship between the first and second patch?

Cheers,
FJP

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-30  7:28                 ` Nick Piggin
@ 2008-09-30 11:21                   ` Ingo Molnar
  2008-09-30 21:14                     ` Suresh Siddha
  0 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2008-09-30 11:21 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Venki Pallipadi, Christoph Lameter, Jeremy Fitzhardinge, arjan,
	tglx, hpa, andi, linux-kernel, Siddha, Suresh B


* Nick Piggin <nickpiggin@yahoo.com.au> wrote:

> On Thursday 25 September 2008 01:53, Venki Pallipadi wrote:
> 
> >  /*
> > + * RED-PEN:  TODO: Add PageReserved() check as well here,
> > + * once we add SetPageReserved() to all the drivers using
> > + * set_memory_* or set_pages_*.
> > + *
> > + * This will help prevent accidentally freeing pages
> > + * before setting the attribute back to WB.
> > + */
> 
> I'd rather we didn't add any more uses of PageReserved without a
> really good reason.
> 
> At this point in time (or at least last time I looked, a year or
> two ago), it isn't a whole lot of work to remove PG_reserved
> completely. If the waters get muddied up again, it could require
> another significant rework to remove it in future...

agreed.

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-30 11:21                   ` Ingo Molnar
@ 2008-09-30 21:14                     ` Suresh Siddha
  2008-10-01  9:29                       ` Ingo Molnar
  2008-10-02  2:27                       ` Nick Piggin
  0 siblings, 2 replies; 24+ messages in thread
From: Suresh Siddha @ 2008-09-30 21:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Nick Piggin, Pallipadi, Venkatesh, Christoph Lameter,
	Jeremy Fitzhardinge, arjan, tglx, hpa, andi, linux-kernel,
	Siddha, Suresh B

On Tue, Sep 30, 2008 at 04:21:28AM -0700, Ingo Molnar wrote:
> 
> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> 
> > On Thursday 25 September 2008 01:53, Venki Pallipadi wrote:
> >
> > >  /*
> > > + * RED-PEN:  TODO: Add PageReserved() check as well here,
> > > + * once we add SetPageReserved() to all the drivers using
> > > + * set_memory_* or set_pages_*.
> > > + *
> > > + * This will help prevent accidentally freeing pages
> > > + * before setting the attribute back to WB.
> > > + */
> >
> > I'd rather we didn't add any more uses of PageReserved without a
> > really good reason.
> >
> > At this point in time (or at least last time I looked, a year or
> > two ago), it isn't a whole lot of work to remove PG_reserved
> > completely. If the waters get muddied up again, it could require
> > another significant rework to remove it in future...
> 
> agreed.

If a driver by mistake free's a RAM page before changing its memory attribute
back to WB, we want the generic -mm to catch it.

Today, free_pages_check() prevents freeing the page with PageReserved set. We
want to use this and make sure that either set_page_uc/wc() or the driver
calling these API's set the PageReserved bit. There are already some drivers
which do SetPageReserved() before changing the attribute.

I don't know the history behind PageReserved. But is there a recommended
way to achieve what we want? Either we need to use PageReserved bit or  add
some arch specific checks (in the x86 case, check arch_1) in free_pages_check().
Right?

thanks,
suresh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 2/2] x86: Handle error returns in set_memory_*
  2008-09-30  8:36     ` Frans Pop
@ 2008-09-30 21:29       ` Suresh Siddha
  0 siblings, 0 replies; 24+ messages in thread
From: Suresh Siddha @ 2008-09-30 21:29 UTC (permalink / raw)
  To: Frans Pop
  Cc: Pallipadi, Venkatesh, Ingo Molnar, arjan, tglx, hpa, andi,
	linux-kernel, Siddha, Suresh B

On Tue, Sep 30, 2008 at 01:36:04AM -0700, Frans Pop wrote:
> As I mentioned before, I only tested with the first of the two patches
> (as you asked me to) and that solved the artifacts for me. What's the
> explanation behind that?
> 
> What is the relationship between the first and second patch?

Frans, There is no relationship between the two patches. First patch
(track memtype for RAM in page struct) will fix the behavior you
observed and the second patch addresses some error conditions which
were noticed by Venki.

Both should have been pushed independently.

thanks,
suresh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-30 21:14                     ` Suresh Siddha
@ 2008-10-01  9:29                       ` Ingo Molnar
  2008-10-02  2:27                       ` Nick Piggin
  1 sibling, 0 replies; 24+ messages in thread
From: Ingo Molnar @ 2008-10-01  9:29 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Nick Piggin, Pallipadi, Venkatesh, Christoph Lameter,
	Jeremy Fitzhardinge, arjan, tglx, hpa, andi, linux-kernel,
	Andrew Morton


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> On Tue, Sep 30, 2008 at 04:21:28AM -0700, Ingo Molnar wrote:
> > 
> > * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> > 
> > > On Thursday 25 September 2008 01:53, Venki Pallipadi wrote:
> > >
> > > >  /*
> > > > + * RED-PEN:  TODO: Add PageReserved() check as well here,
> > > > + * once we add SetPageReserved() to all the drivers using
> > > > + * set_memory_* or set_pages_*.
> > > > + *
> > > > + * This will help prevent accidentally freeing pages
> > > > + * before setting the attribute back to WB.
> > > > + */
> > >
> > > I'd rather we didn't add any more uses of PageReserved without a
> > > really good reason.
> > >
> > > At this point in time (or at least last time I looked, a year or
> > > two ago), it isn't a whole lot of work to remove PG_reserved
> > > completely. If the waters get muddied up again, it could require
> > > another significant rework to remove it in future...
> > 
> > agreed.
> 
> If a driver by mistake free's a RAM page before changing its memory 
> attribute back to WB, we want the generic -mm to catch it.
>
> Today, free_pages_check() prevents freeing the page with PageReserved 
> set. We want to use this and make sure that either set_page_uc/wc() or 
> the driver calling these API's set the PageReserved bit. There are 
> already some drivers which do SetPageReserved() before changing the 
> attribute.

no, the generic -mm does not 'catch' PageReserved, it simply _ignores_ 
it. So i agree about having a debug check there, it's just that what you 
propose does not achieve that.

PageReserved is a legacy thing that should not be used in new code.  
Using it will only hide bugs and adds quirkiness to the PAT code.

> I don't know the history behind PageReserved. But is there a 
> recommended way to achieve what we want? Either we need to use 
> PageReserved bit or add some arch specific checks (in the x86 case, 
> check arch_1) in free_pages_check(). Right?

the best way i think would be to add arch_1 to PAGE_FLAGS_CHECK_AT_FREE. 
That way the mm becomes very noisy if this is ever freed.

Nick, Andrew, any preferences?

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 1/2] x86: track memtype for RAM in page struct - v3
  2008-09-30 21:14                     ` Suresh Siddha
  2008-10-01  9:29                       ` Ingo Molnar
@ 2008-10-02  2:27                       ` Nick Piggin
  1 sibling, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2008-10-02  2:27 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Ingo Molnar, Pallipadi, Venkatesh, Christoph Lameter,
	Jeremy Fitzhardinge, arjan, tglx, hpa, andi, linux-kernel

On Wednesday 01 October 2008 07:14, Suresh Siddha wrote:
> On Tue, Sep 30, 2008 at 04:21:28AM -0700, Ingo Molnar wrote:
> > * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> > > On Thursday 25 September 2008 01:53, Venki Pallipadi wrote:
> > > >  /*
> > > > + * RED-PEN:  TODO: Add PageReserved() check as well here,
> > > > + * once we add SetPageReserved() to all the drivers using
> > > > + * set_memory_* or set_pages_*.
> > > > + *
> > > > + * This will help prevent accidentally freeing pages
> > > > + * before setting the attribute back to WB.
> > > > + */
> > >
> > > I'd rather we didn't add any more uses of PageReserved without a
> > > really good reason.
> > >
> > > At this point in time (or at least last time I looked, a year or
> > > two ago), it isn't a whole lot of work to remove PG_reserved
> > > completely. If the waters get muddied up again, it could require
> > > another significant rework to remove it in future...
> >
> > agreed.
>
> If a driver by mistake free's a RAM page before changing its memory
> attribute back to WB, we want the generic -mm to catch it.
>
> Today, free_pages_check() prevents freeing the page with PageReserved set.

That's gone away in -mm.


> We want to use this and make sure that either set_page_uc/wc() or the
> driver calling these API's set the PageReserved bit. There are already some
> drivers which do SetPageReserved() before changing the attribute.
>
> I don't know the history behind PageReserved. But is there a recommended
> way to achieve what we want? Either we need to use PageReserved bit or  add
> some arch specific checks (in the x86 case, check arch_1) in
> free_pages_check(). Right?

It would be better if another flag is used, yes.

Setting PageReserved used to prevent put_page from decrementing the page's
refcount. Most drivers still do it just because we kept that code in there
to catch any bugs caused by the removal of PageReserved check from the
refcounting.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2008-10-02  2:27 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-13  0:00 [patch 0/2] PAT fix/optimization Venkatesh Pallipadi
2008-09-13  0:00 ` [patch 1/2] x86: track memtype for RAM in page struct Venkatesh Pallipadi
2008-09-13 17:03   ` Jeremy Fitzhardinge
2008-09-14 13:29     ` Ingo Molnar
2008-09-14 14:18       ` Jeremy Fitzhardinge
2008-09-14 14:22         ` Ingo Molnar
2008-09-23 21:46           ` Venki Pallipadi
2008-09-23 21:59             ` Christoph Lameter
2008-09-24 15:53               ` [patch 1/2] x86: track memtype for RAM in page struct - v3 Venki Pallipadi
2008-09-27 17:59                 ` Ingo Molnar
2008-09-30  7:28                 ` Nick Piggin
2008-09-30 11:21                   ` Ingo Molnar
2008-09-30 21:14                     ` Suresh Siddha
2008-10-01  9:29                       ` Ingo Molnar
2008-10-02  2:27                       ` Nick Piggin
2008-09-13 17:24   ` [patch 1/2] x86: track memtype for RAM in page struct Frans Pop
2008-09-14 14:12   ` Ingo Molnar
2008-09-23 21:48     ` Pallipadi, Venkatesh
2008-09-13  0:00 ` [patch 2/2] x86: Handle error returns in set_memory_* Venkatesh Pallipadi
2008-09-14 13:27   ` Ingo Molnar
2008-09-14 14:35     ` Frans Pop
2008-09-14 14:10   ` Ingo Molnar
2008-09-30  8:36     ` Frans Pop
2008-09-30 21:29       ` Suresh Siddha

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).