linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: create array based interface to change page attribute
@ 2008-03-31  5:19 Dave Airlie
  2008-03-31  6:54 ` Thomas Hellström
                   ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Dave Airlie @ 2008-03-31  5:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, mingo, arjan, thomas


When cpa was refactored to the new set_memory_ interfaces, it removed
a special case fast path for AGP systems, which did a lot of page by page
attribute changing and held the flushes until they were finished. The
DRM memory manager also required this to get useable speeds.

This introduces a new interface, which accepts an array of memory addresses
to have attributes changed on and to flush once finished.

Further changes to the AGP stack to actually use this interface will be
published later.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 arch/x86/mm/pageattr-test.c  |   12 ++-
 arch/x86/mm/pageattr.c       |  164 +++++++++++++++++++++++++++++++-----------
 include/asm-x86/cacheflush.h |    3 +
 3 files changed, 132 insertions(+), 47 deletions(-)

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 75f1b10..22c1496 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -111,6 +111,7 @@ static int pageattr_test(void)
 	unsigned int level;
 	int i, k;
 	int err;
+	unsigned long test_addr;
 
 	if (print)
 		printk(KERN_INFO "CPA self-test:\n");
@@ -165,8 +166,10 @@ static int pageattr_test(void)
 			continue;
 		}
 
-		err = change_page_attr_clear(addr[i], len[i],
-					       __pgprot(_PAGE_GLOBAL));
+
+		test_addr = addr[i];
+		err = change_page_attr_clear(&test_addr, len[i],
+					     __pgprot(_PAGE_GLOBAL), 0);
 		if (err < 0) {
 			printk(KERN_ERR "CPA %d failed %d\n", i, err);
 			failed++;
@@ -198,8 +201,9 @@ static int pageattr_test(void)
 			failed++;
 			continue;
 		}
-		err = change_page_attr_set(addr[i], len[i],
-					     __pgprot(_PAGE_GLOBAL));
+		test_addr = addr[i];
+		err = change_page_attr_set(&test_addr, len[i],
+					   __pgprot(_PAGE_GLOBAL), 0);
 		if (err < 0) {
 			printk(KERN_ERR "CPA reverting failed: %d\n", err);
 			failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7b79f6b..6124726 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -22,14 +22,18 @@
  * The current flushing context - we pass it instead of 5 arguments:
  */
 struct cpa_data {
-	unsigned long	vaddr;
+	unsigned long	*vaddr;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
 	int		numpages;
-	int		flushtlb;
+	int		flags;
 	unsigned long	pfn;
+	int             curaddr;
 };
 
+#define CPA_FLUSHTLB 1
+#define CPA_ARRAY 2
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -145,6 +149,35 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	}
 }
 
+static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+{
+	unsigned int i, level;
+	unsigned long *addr;
+
+	BUG_ON(irqs_disabled());
+
+	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+
+	if (!cache)
+		return;
+
+	/*
+	 * We only need to flush on one CPU,
+	 * clflush is a MESI-coherent instruction that
+	 * will cause all other CPUs to flush the same
+	 * cachelines:
+	 */
+	for (i = 0, addr = start; i < numpages; i++, addr++) {
+		pte_t *pte = lookup_address(*addr, &level);
+
+		/*
+		 * Only flush present addresses:
+		 */
+		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+			clflush_cache_range((void *) *addr, PAGE_SIZE);
+	}
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -349,7 +382,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 		 */
 		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
 		__set_pmd_pte(kpte, address, new_pte);
-		cpa->flushtlb = 1;
+		cpa->flags |= CPA_FLUSHTLB;
 		do_split = 0;
 	}
 
@@ -527,11 +560,13 @@ out_unlock:
 
 static int __change_page_attr(struct cpa_data *cpa, int primary)
 {
-	unsigned long address = cpa->vaddr;
+	unsigned long address;
 	int do_split, err;
 	unsigned int level;
 	pte_t *kpte, old_pte;
 
+	address = cpa->vaddr[cpa->curaddr];
+
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
@@ -543,7 +578,7 @@ repeat:
 			return 0;
 		printk(KERN_WARNING "CPA: called for zero pte. "
 		       "vaddr = %lx cpa->vaddr = %lx\n", address,
-		       cpa->vaddr);
+		       *cpa->vaddr);
 		WARN_ON(1);
 		return -EINVAL;
 	}
@@ -570,7 +605,7 @@ repeat:
 		 */
 		if (pte_val(old_pte) != pte_val(new_pte)) {
 			set_pte_atomic(kpte, new_pte);
-			cpa->flushtlb = 1;
+			cpa->flags |= CPA_FLUSHTLB;
 		}
 		cpa->numpages = 1;
 		return 0;
@@ -594,7 +629,7 @@ repeat:
 	 */
 	err = split_large_page(kpte, address);
 	if (!err) {
-		cpa->flushtlb = 1;
+		cpa->flags |= CPA_FLUSHTLB;
 		goto repeat;
 	}
 
@@ -607,6 +642,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
 	int ret = 0;
+	unsigned long temp_cpa_vaddr, vaddr;
 
 	if (cpa->pfn > max_pfn_mapped)
 		return 0;
@@ -615,11 +651,16 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (!within(cpa->vaddr, PAGE_OFFSET,
+	vaddr = cpa->vaddr[cpa->curaddr];
+
+	if (!within(vaddr, PAGE_OFFSET,
 		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
 
 		alias_cpa = *cpa;
-		alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+		temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.curaddr = 0;
+		alias_cpa.flags &= ~CPA_ARRAY;
 
 		ret = __change_page_attr_set_clr(&alias_cpa, 0);
 	}
@@ -631,7 +672,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * No need to redo, when the primary call touched the high
 	 * mapping already:
 	 */
-	if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
+	if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
 		return 0;
 
 	/*
@@ -642,8 +683,11 @@ static int cpa_process_alias(struct cpa_data *cpa)
 		return 0;
 
 	alias_cpa = *cpa;
-	alias_cpa.vaddr =
-		(cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+	temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+			 __START_KERNEL_map - phys_base;
+	alias_cpa.vaddr = &temp_cpa_vaddr;
+	alias_cpa.curaddr = 0;
+	alias_cpa.flags &= ~CPA_ARRAY;
 
 	/*
 	 * The high mapping range is imprecise, so ignore the return value.
@@ -681,7 +725,10 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		 */
 		BUG_ON(cpa->numpages > numpages);
 		numpages -= cpa->numpages;
-		cpa->vaddr += cpa->numpages * PAGE_SIZE;
+		if (cpa->flags & CPA_ARRAY)
+			cpa->curaddr++;
+		else
+			*cpa->vaddr += cpa->numpages * PAGE_SIZE;
 	}
 	return 0;
 }
@@ -692,8 +739,9 @@ static inline int cache_attr(pgprot_t attr)
 		(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
 }
 
-static int change_page_attr_set_clr(unsigned long addr, int numpages,
-				    pgprot_t mask_set, pgprot_t mask_clr)
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
+				    pgprot_t mask_set, pgprot_t mask_clr,
+				    int array)
 {
 	struct cpa_data cpa;
 	int ret, cache, checkalias;
@@ -708,19 +756,25 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 		return 0;
 
 	/* Ensure we are PAGE_SIZE aligned */
-	if (addr & ~PAGE_MASK) {
-		addr &= PAGE_MASK;
-		/*
-		 * People should not be passing in unaligned addresses:
-		 */
-		WARN_ON_ONCE(1);
+	if (!array) {
+		if (*addr & ~PAGE_MASK) {
+			*addr &= PAGE_MASK;
+			/*
+			 * People should not be passing in unaligned addresses:
+			 */
+			WARN_ON_ONCE(1);
+		}
 	}
 
 	cpa.vaddr = addr;
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
 	cpa.mask_clr = mask_clr;
-	cpa.flushtlb = 0;
+	cpa.flags = 0;
+	cpa.curaddr = 0;
+
+	if (array)
+		cpa.flags |= CPA_ARRAY;
 
 	/* No alias checking for _NX bit modifications */
 	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -730,7 +784,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	/*
 	 * Check whether we really changed something:
 	 */
-	if (!cpa.flushtlb)
+	if (!(cpa.flags & CPA_FLUSHTLB))
 		goto out;
 
 	/*
@@ -746,7 +800,10 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	 * wbindv):
 	 */
 	if (!ret && cpu_has_clflush)
-		cpa_flush_range(addr, numpages, cache);
+		if (cpa.flags & CPA_ARRAY)
+			cpa_flush_array(addr, numpages, cache);
+		else
+			cpa_flush_range(*addr, numpages, cache);
 	else
 		cpa_flush_all(cache);
 
@@ -756,57 +813,74 @@ out:
 	return ret;
 }
 
-static inline int change_page_attr_set(unsigned long addr, int numpages,
-				       pgprot_t mask)
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
+				       pgprot_t mask, int array)
 {
-	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
+	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0),
+					array);
 }
 
-static inline int change_page_attr_clear(unsigned long addr, int numpages,
-					 pgprot_t mask)
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+					 pgprot_t mask, int array)
 {
-	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
+	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask,
+					array);
 }
 
 int set_memory_uc(unsigned long addr, int numpages)
 {
-	return change_page_attr_set(addr, numpages,
-				    __pgprot(_PAGE_PCD));
+	return change_page_attr_set(&addr, numpages,
+				    __pgprot(_PAGE_PCD), 0);
 }
 EXPORT_SYMBOL(set_memory_uc);
 
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
+{
+	return change_page_attr_set(addr, addrinarray,
+				    __pgprot(_PAGE_PCD), 1);
+}
+EXPORT_SYMBOL(set_memory_array_uc);
+
 int set_memory_wb(unsigned long addr, int numpages)
 {
-	return change_page_attr_clear(addr, numpages,
-				      __pgprot(_PAGE_PCD | _PAGE_PWT));
+	return change_page_attr_clear(&addr, numpages,
+				      __pgprot(_PAGE_PCD | _PAGE_PWT), 0);
 }
 EXPORT_SYMBOL(set_memory_wb);
 
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
+{
+	return change_page_attr_clear(addr, addrinarray,
+				      __pgprot(_PAGE_PCD | _PAGE_PWT), 1);
+}
+EXPORT_SYMBOL(set_memory_array_wb);
+
 int set_memory_x(unsigned long addr, int numpages)
 {
-	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_x);
 
 int set_memory_nx(unsigned long addr, int numpages)
 {
-	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
+	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
 }
 EXPORT_SYMBOL(set_memory_nx);
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
+	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
+	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+	return change_page_attr_clear(&addr, numpages,
+				      __pgprot(_PAGE_PRESENT), 0);
 }
 
 int set_pages_uc(struct page *page, int numpages)
@@ -859,20 +933,24 @@ int set_pages_rw(struct page *page, int numpages)
 
 static int __set_pages_p(struct page *page, int numpages)
 {
-	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+	unsigned long tempaddr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = &tempaddr,
 				.numpages = numpages,
 				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-				.mask_clr = __pgprot(0)};
+				.mask_clr = __pgprot(0),
+				.flags = 0};
 
 	return __change_page_attr_set_clr(&cpa, 1);
 }
 
 static int __set_pages_np(struct page *page, int numpages)
 {
-	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+	unsigned long tempaddr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = &tempaddr,
 				.numpages = numpages,
 				.mask_set = __pgprot(0),
-				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
+				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+				.flags = 0};
 
 	return __change_page_attr_set_clr(&cpa, 1);
 }
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 5396c21..5646584 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -42,6 +42,9 @@ int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 
+int set_memory_array_uc(unsigned long *addr, int addrinarray);
+int set_memory_array_wb(unsigned long *addr, int addrinarray);
+
 void clflush_cache_range(void *addr, unsigned int size);
 
 void cpa_init(void);





^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  5:19 [PATCH] x86: create array based interface to change page attribute Dave Airlie
@ 2008-03-31  6:54 ` Thomas Hellström
  2008-03-31  9:33   ` Arjan van de Ven
  2008-03-31  7:25 ` Andi Kleen
  2008-04-01 18:20 ` Arjan van de Ven
  2 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31  6:54 UTC (permalink / raw)
  To: Dave Airlie; +Cc: linux-kernel, tglx, mingo, arjan

Dave Airlie wrote:
> When cpa was refactored to the new set_memory_ interfaces, it removed
> a special case fast path for AGP systems, which did a lot of page by page
> attribute changing and held the flushes until they were finished. The
> DRM memory manager also required this to get useable speeds.
>
> This introduces a new interface, which accepts an array of memory addresses
> to have attributes changed on and to flush once finished.
>
> Further changes to the AGP stack to actually use this interface will be
> published later.
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  arch/x86/mm/pageattr-test.c  |   12 ++-
>  arch/x86/mm/pageattr.c       |  164 +++++++++++++++++++++++++++++++-----------
>  include/asm-x86/cacheflush.h |    3 +
>  3 files changed, 132 insertions(+), 47 deletions(-)
>   
...
Dave,
Nice work, but how do we handle highmem pages? I know that they don't 
need any attribute change since they're not in the kernel map, but both 
the AGP module and the DRM memory manager typically hold highmem 
addresses in their arrays, so the code should presumably detect those 
and avoid them?

Since this is an AGPgart and DRM fastpath, the interface should ideally 
be adapted to match the data structures used by those callers. The 
AGPgart module uses an array of addresses, which effectively stops it 
from using pages beyond the DMA32 limit. The DRM memory manager uses an 
array of struct page pointers, but using that was, If I understand 
things correctly, rejected.

So, if we, at some point, want to have an AGPgart module capable of 
using anything beyond the DMA32 limit we will end up with an interface 
that doesn't match neither AGPgart nor DRM, for which users the fastpath 
was originally intended.

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  5:19 [PATCH] x86: create array based interface to change page attribute Dave Airlie
  2008-03-31  6:54 ` Thomas Hellström
@ 2008-03-31  7:25 ` Andi Kleen
  2008-03-31  7:55   ` Thomas Hellström
                     ` (2 more replies)
  2008-04-01 18:20 ` Arjan van de Ven
  2 siblings, 3 replies; 39+ messages in thread
From: Andi Kleen @ 2008-03-31  7:25 UTC (permalink / raw)
  To: Dave Airlie; +Cc: linux-kernel, tglx, mingo, arjan, thomas

Dave Airlie <airlied@redhat.com> writes:
>  
> +#define CPA_FLUSHTLB 1
> +#define CPA_ARRAY 2

I don't think CPA_ARRAY should be a separate case. Rather single
page flushing should be an array with only a single entry. pageattr
is already very complex, no need to make add more special cases.
> +
> +		/*
> +		 * Only flush present addresses:
> +		 */
> +		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
> +			clflush_cache_range((void *) *addr, PAGE_SIZE);

Also it is doubtful clflush really makes sense on a large array. Just
doing wbinvd might be faster then. Or perhaps better supporting Self-Snoop
should be revisited, that would at least eliminate it on most Intel 
CPUs.

-Andi

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  7:25 ` Andi Kleen
@ 2008-03-31  7:55   ` Thomas Hellström
  2008-03-31  8:38     ` Andi Kleen
  2008-03-31  9:56   ` Arjan van de Ven
  2008-03-31 11:21   ` Dave Airlie
  2 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31  7:55 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, arjan

Andi Kleen wrote:
> Dave Airlie <airlied@redhat.com> writes:
>   
>>  
>> +#define CPA_FLUSHTLB 1
>> +#define CPA_ARRAY 2
>>     
>
> I don't think CPA_ARRAY should be a separate case. Rather single
> page flushing should be an array with only a single entry. pageattr
> is already very complex, no need to make add more special cases.
>   
>> +
>> +		/*
>> +		 * Only flush present addresses:
>> +		 */
>> +		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
>> +			clflush_cache_range((void *) *addr, PAGE_SIZE);
>>     
>
> Also it is doubtful clflush really makes sense on a large array. Just
> doing wbinvd might be faster then. Or perhaps better supporting Self-Snoop
> should be revisited, that would at least eliminate it on most Intel 
> CPUs.
>
>   
I agree that wbinvd() seems to be faster on large arrays on the 
processors I've tested. But isn't there a severe latency problem with 
that instruction, that makes people really want to avoid it in all 
possible cases?

Also I think we need to clarify the semantics of the c_p_a 
functionality. Right now both AGP and DRM relies on c_p_a  doing an 
explicit cache flush. Otherwise the data won't appear on the device side 
of the aperture.
If we use self-snoop, the AGP and DRM drivers can't rely on this flush 
being performed, and they have to do the flush themselves, and for 
non-self-snooping processors, the flush needs to be done twice?

/Thomas

> -Andi
>   




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  7:55   ` Thomas Hellström
@ 2008-03-31  8:38     ` Andi Kleen
  2008-03-31  9:06       ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2008-03-31  8:38 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo, arjan

> Also I think we need to clarify the semantics of the c_p_a 
> functionality. Right now both AGP and DRM relies on c_p_a  doing an 
> explicit cache flush. Otherwise the data won't appear on the device side 
> of the aperture.

But surely not in cpa I hope ? Or are you saying you first write data
and then do cpa? If true that would be quite an abuse of CPA 
I would say and you should fix it ASAP.

> If we use self-snoop, the AGP and DRM drivers can't rely on this flush 
> being performed, and they have to do the flush themselves, and for 

They definitely should flush themselves if they want data to reach
the device. That is obviously required any time they reuse a page
too.

-Andi

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  8:38     ` Andi Kleen
@ 2008-03-31  9:06       ` Thomas Hellström
  2008-03-31  9:18         ` Andi Kleen
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31  9:06 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, arjan

Andi Kleen wrote:
>> Also I think we need to clarify the semantics of the c_p_a 
>> functionality. Right now both AGP and DRM relies on c_p_a  doing an 
>> explicit cache flush. Otherwise the data won't appear on the device side 
>> of the aperture.
>>     
>
> But surely not in cpa I hope ? Or are you saying you first write data
> and then do cpa? If true that would be quite an abuse of CPA 
> I would say and you should fix it ASAP.
>
>   
As AGP buffers are moved in- and out of AGP, the caching policy changes, 
so yes, there may be writes to cache coherent memory that needs to be 
flushed at some point. Since CPA has been doing that up to now, and the 
codepaths involved are quite time-critical, a double cache-flush is a 
no-no, so if this is left to the caller, we must be able to tell CPA 
that any needed cache-flush has already been performed.
>> If we use self-snoop, the AGP and DRM drivers can't rely on this flush 
>> being performed, and they have to do the flush themselves, and for 
>>     
>
> They definitely should flush themselves if they want data to reach
> the device. That is obviously required any time they reuse a page
> too.
>   
Understood,
but then we *must* really find a way to say "don't flush the cache 
again", perhaps part of Dave's array function?

/Thomas
> -Andi
>   




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  9:06       ` Thomas Hellström
@ 2008-03-31  9:18         ` Andi Kleen
  2008-03-31 11:10           ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2008-03-31  9:18 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo, arjan

On Mon, Mar 31, 2008 at 11:06:16AM +0200, Thomas Hellström wrote:
> Andi Kleen wrote:
> >>Also I think we need to clarify the semantics of the c_p_a 
> >>functionality. Right now both AGP and DRM relies on c_p_a  doing an 
> >>explicit cache flush. Otherwise the data won't appear on the device side 
> >>of the aperture.
> >>    
> >
> >But surely not in cpa I hope ? Or are you saying you first write data
> >and then do cpa? If true that would be quite an abuse of CPA 
> >I would say and you should fix it ASAP.
> >
> >  
> As AGP buffers are moved in- and out of AGP, the caching policy changes, 
> so yes, there may be writes to cache coherent memory that needs to be 
> flushed at some point. Since CPA has been doing that up to now, and the 
> codepaths involved are quite time-critical, a double cache-flush is a 

That doesn't make sense. You shouldn't be using CPA in any 
time critical code path. It will always be slow. 

For anything time critical you should keep a pool of uncached pages 
once set up using CPA and reuse them.

CPA really should only be used on initialization or for
larger setup changes which are ok to go somewhat slower.

> no-no, so if this is left to the caller, we must be able to tell CPA 
> that any needed cache-flush has already been performed.

Sounds like a bad design.

> >>If we use self-snoop, the AGP and DRM drivers can't rely on this flush 
> >>being performed, and they have to do the flush themselves, and for 
> >>    
> >
> >They definitely should flush themselves if they want data to reach
> >the device. That is obviously required any time they reuse a page
> >too.
> >  
> Understood,
> but then we *must* really find a way to say "don't flush the cache 
> again", perhaps part of Dave's array function?

The cache must be flushed in CPA, there is no way around it.

If you write data into the buffers before you do CPA on them
you could avoid it, but I don't think you should do that anywhere
near time critical code, so it actually shouldn't matter.

-Andi


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  6:54 ` Thomas Hellström
@ 2008-03-31  9:33   ` Arjan van de Ven
  2008-03-31 11:04     ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-03-31  9:33 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Dave Airlie wrote:
>> When cpa was refactored to the new set_memory_ interfaces, it removed
>> a special case fast path for AGP systems, which did a lot of page by page
>> attribute changing and held the flushes until they were finished. The
>> DRM memory manager also required this to get useable speeds.
>>
>> This introduces a new interface, which accepts an array of memory 
>> addresses
>> to have attributes changed on and to flush once finished.
>>
>> Further changes to the AGP stack to actually use this interface will be
>> published later.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> ---
>>  arch/x86/mm/pageattr-test.c  |   12 ++-
>>  arch/x86/mm/pageattr.c       |  164 
>> +++++++++++++++++++++++++++++++-----------
>>  include/asm-x86/cacheflush.h |    3 +
>>  3 files changed, 132 insertions(+), 47 deletions(-)
>>   
> ...
> Dave,
> Nice work, but how do we handle highmem pages? 

Cache attributes fundamentally work on a mapping and not on physical memory.
(MTRR's are special there, they do work on physical memory, but that's a special case and not relevant here)

So to be honest, your question doesn't make sense;  because all I can do is ask "which mapping of these pages".

Even the old interfaces prior to 2.6.24 only managed to deal with SOME of the mappings of a page.
And if/when future CPUs don't require all mappings to be in sync, clearly the kernel will only change
the mapping that is requested as well.



> Since this is an AGPgart and DRM fastpath, the interface should ideally 
> be adapted to match the data structures used by those callers. 

Actually, the interface has to make sense conceptually convey the right information,
the implementation should not have to second guess internals of AGP/DRM because that
would just be a recipe for disaster.
>The 
> AGPgart module uses an array of addresses, which effectively stops it 
> from using pages beyond the DMA32 limit. The DRM memory manager uses an 
> array of struct page pointers, but using that was, If I understand 
> things correctly, rejected.

yes because simply put, if you pass a struct page to such a function, you're not telling it which
mapping or mappings you want changed....
(And if you say "only the 1:1 mapping, so why doesn't the other side calculate that"... there's no speed gain in doing
the calculation for that on the other side of an interface, and that makes it zero reason to misdesign the interface
to only have the "which mapping" information implicit)


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  7:25 ` Andi Kleen
  2008-03-31  7:55   ` Thomas Hellström
@ 2008-03-31  9:56   ` Arjan van de Ven
  2008-03-31 11:21   ` Dave Airlie
  2 siblings, 0 replies; 39+ messages in thread
From: Arjan van de Ven @ 2008-03-31  9:56 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, thomas

Andi Kleen wrote:

> Also it is doubtful clflush really makes sense on a large array. Just
> doing wbinvd might be faster then.

wbinvd is rarely a good idea; think about it... it'll flush 12Mb of cache *per socket* in one instruction.
(on a modern Intel consumer grade CPU, more on the enterprise ones)
This doesn't only impact the current logical thread, but ALL of the threads in the system, since the cache
coherency needs to be preserved, all have to go empty at the same time.
Forget real time... this can take really long even for non-realtime users ;)

At least clflush breaks this up into smaller pieces so total latency won't suck entirely.



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  9:33   ` Arjan van de Ven
@ 2008-03-31 11:04     ` Thomas Hellström
  0 siblings, 0 replies; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31 11:04 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>> Dave Airlie wrote:
>>> When cpa was refactored to the new set_memory_ interfaces, it removed
>>> a special case fast path for AGP systems, which did a lot of page by 
>>> page
>>> attribute changing and held the flushes until they were finished. The
>>> DRM memory manager also required this to get useable speeds.
>>>
>>> This introduces a new interface, which accepts an array of memory 
>>> addresses
>>> to have attributes changed on and to flush once finished.
>>>
>>> Further changes to the AGP stack to actually use this interface will be
>>> published later.
>>>
>>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>>> ---
>>>  arch/x86/mm/pageattr-test.c  |   12 ++-
>>>  arch/x86/mm/pageattr.c       |  164 
>>> +++++++++++++++++++++++++++++++-----------
>>>  include/asm-x86/cacheflush.h |    3 +
>>>  3 files changed, 132 insertions(+), 47 deletions(-)
>>>   
>> ...
>> Dave,
>> Nice work, but how do we handle highmem pages? 
>
> Cache attributes fundamentally work on a mapping and not on physical 
> memory.
> (MTRR's are special there, they do work on physical memory, but that's 
> a special case and not relevant here)
>
> So to be honest, your question doesn't make sense;  because all I can 
> do is ask "which mapping of these pages".
>
> Even the old interfaces prior to 2.6.24 only managed to deal with SOME 
> of the mappings of a page.
> And if/when future CPUs don't require all mappings to be in sync, 
> clearly the kernel will only change
> the mapping that is requested as well.
>
>
>
>> Since this is an AGPgart and DRM fastpath, the interface should 
>> ideally be adapted to match the data structures used by those callers. 
>
> Actually, the interface has to make sense conceptually convey the 
> right information,
> the implementation should not have to second guess internals of 
> AGP/DRM because that
> would just be a recipe for disaster.
>> The AGPgart module uses an array of addresses, which effectively 
>> stops it from using pages beyond the DMA32 limit. The DRM memory 
>> manager uses an array of struct page pointers, but using that was, If 
>> I understand things correctly, rejected.
>
> yes because simply put, if you pass a struct page to such a function, 
> you're not telling it which
> mapping or mappings you want changed....
> (And if you say "only the 1:1 mapping, so why doesn't the other side 
> calculate that"... there's no speed gain in doing
> the calculation for that on the other side of an interface, and that 
> makes it zero reason to misdesign the interface
> to only have the "which mapping" information implicit)
>
Hmm. I get the point. There should be ways to do reasonably efficient 
workarounds in the drivers.

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  9:18         ` Andi Kleen
@ 2008-03-31 11:10           ` Thomas Hellström
  2008-03-31 16:08             ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31 11:10 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, arjan

Andi Kleen wrote:
> On Mon, Mar 31, 2008 at 11:06:16AM +0200, Thomas Hellström wrote:
>   
>> Andi Kleen wrote:
>>     
>>>> Also I think we need to clarify the semantics of the c_p_a 
>>>> functionality. Right now both AGP and DRM relies on c_p_a  doing an 
>>>> explicit cache flush. Otherwise the data won't appear on the device side 
>>>> of the aperture.
>>>>    
>>>>         
>>> But surely not in cpa I hope ? Or are you saying you first write data
>>> and then do cpa? If true that would be quite an abuse of CPA 
>>> I would say and you should fix it ASAP.
>>>
>>>  
>>>       
>> As AGP buffers are moved in- and out of AGP, the caching policy changes, 
>> so yes, there may be writes to cache coherent memory that needs to be 
>> flushed at some point. Since CPA has been doing that up to now, and the 
>> codepaths involved are quite time-critical, a double cache-flush is a 
>>     
>
> That doesn't make sense. You shouldn't be using CPA in any 
> time critical code path. It will always be slow. 
>
> For anything time critical you should keep a pool of uncached pages 
> once set up using CPA and reuse them.
>
> CPA really should only be used on initialization or for
> larger setup changes which are ok to go somewhat slower.
>   
Let me rehprase. Not really time-critical but it is of some importance 
that CPA is done quickly.
We're dealing with the tradeoff of reading from uncached device memory 
vs taking the pages out of
AGP, setting up a cache-coherent mapping, read and then change back. 
What we'd really would like to set up is a pool of completely unmapped 
(like highmem) pages. Then we could, to a large extent, avoid the CPA calls.

>
>
> The cache must be flushed in CPA, there is no way around it.
>
> If you write data into the buffers before you do CPA on them
> you could avoid it, but I don't think you should do that anywhere
> near time critical code, so it actually shouldn't matter.
>
> -Andi
>   
But then we wouldn't really be discussing SS either? For DRM purposes, 
the more performance we can squeeze out of CPA, the better.

/Thomas





^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  7:25 ` Andi Kleen
  2008-03-31  7:55   ` Thomas Hellström
  2008-03-31  9:56   ` Arjan van de Ven
@ 2008-03-31 11:21   ` Dave Airlie
  2008-03-31 11:46     ` Andi Kleen
  2 siblings, 1 reply; 39+ messages in thread
From: Dave Airlie @ 2008-03-31 11:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, arjan, thomas

On Mon, Mar 31, 2008 at 5:25 PM, Andi Kleen <andi@firstfloor.org> wrote:
> Dave Airlie <airlied@redhat.com> writes:
>  >
>  > +#define CPA_FLUSHTLB 1
>  > +#define CPA_ARRAY 2
>
>  I don't think CPA_ARRAY should be a separate case. Rather single
>  page flushing should be an array with only a single entry. pageattr
>  is already very complex, no need to make add more special cases.

I thought about this but the current interface takes a start address
and number of pages from that point to cpa,
the array interface takes an array of page sized pages.

I don't really think we need to generate an array in the first case
with all the pages in it..

Dave.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 11:21   ` Dave Airlie
@ 2008-03-31 11:46     ` Andi Kleen
  2008-04-02  1:35       ` Dave Airlie
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2008-03-31 11:46 UTC (permalink / raw)
  To: Dave Airlie
  Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo, arjan, thomas

On Mon, Mar 31, 2008 at 09:21:19PM +1000, Dave Airlie wrote:
> On Mon, Mar 31, 2008 at 5:25 PM, Andi Kleen <andi@firstfloor.org> wrote:
> > Dave Airlie <airlied@redhat.com> writes:
> >  >
> >  > +#define CPA_FLUSHTLB 1
> >  > +#define CPA_ARRAY 2
> >
> >  I don't think CPA_ARRAY should be a separate case. Rather single
> >  page flushing should be an array with only a single entry. pageattr
> >  is already very complex, no need to make add more special cases.
> 
> I thought about this but the current interface takes a start address
> and number of pages from that point to cpa,
> the array interface takes an array of page sized pages.
> 
> I don't really think we need to generate an array in the first case
> with all the pages in it..

Just put the length into the array members too.

-Andi

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 11:10           ` Thomas Hellström
@ 2008-03-31 16:08             ` Arjan van de Ven
  2008-03-31 16:41               ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-03-31 16:08 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:

> Let me rehprase. Not really time-critical but it is of some importance 
> that CPA is done quickly.
> We're dealing with the tradeoff of reading from uncached device memory 

uncached or write combining ?

> vs taking the pages out of
> AGP, setting up a cache-coherent mapping, read and then change back. 
> What we'd really would like to set up is a pool of completely unmapped 
> (like highmem) pages. Then we could, to a large extent, avoid the CPA 
> calls.

changing attributes by nature means a tlb flush and a bunch of expensive cache work.
That's never going to be cheap, I guess it all depends on how much work you do
on the memory for it to pay off or not...

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 16:08             ` Arjan van de Ven
@ 2008-03-31 16:41               ` Thomas Hellström
  2008-03-31 16:49                 ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31 16:41 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>
>> Let me rehprase. Not really time-critical but it is of some 
>> importance that CPA is done quickly.
>> We're dealing with the tradeoff of reading from uncached device memory 
>
> uncached or write combining ?
The user-space mappings (the ones that we really use) are usually 
write-combined, whereas the kernel mappings are uncached. (I think this 
is OK since both mapping types implies no cache coherency). Even if 
(IIRC) write combining is theoretically prefetchable, some devices give 
read speeds around 9MB/s.
>
>> vs taking the pages out of
>> AGP, setting up a cache-coherent mapping, read and then change back. 
>> What we'd really would like to set up is a pool of completely 
>> unmapped (like highmem) pages. Then we could, to a large extent, 
>> avoid the CPA calls.
>
> changing attributes by nature means a tlb flush and a bunch of 
> expensive cache work.
> That's never going to be cheap, I guess it all depends on how much 
> work you do
> on the memory for it to pay off or not...
Indeed. Actually with the new non-wbinvd() CPA, We seem to benefit 
already if the buffer is a single page, though it's probably hard to 
measure the impact of repopulating the tlb.

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 16:41               ` Thomas Hellström
@ 2008-03-31 16:49                 ` Arjan van de Ven
  2008-03-31 17:26                   ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-03-31 16:49 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Arjan van de Ven wrote:
>> Thomas Hellström wrote:
>>
>>> Let me rehprase. Not really time-critical but it is of some 
>>> importance that CPA is done quickly.
>>> We're dealing with the tradeoff of reading from uncached device memory 
>>
>> uncached or write combining ?
> The user-space mappings (the ones that we really use) are usually 
> write-combined, whereas the kernel mappings are uncached. (I think this 
> is OK since both mapping types implies no cache coherency). 

This is not officially allowed and may tripple fault your cpu..
To comply with the spec one needs to have ALL mappings the same unfortunately.
(And yes, this is a hard problem)

>Even if 
> (IIRC) write combining is theoretically prefetchable, some devices give 
> read speeds around 9MB/s.
>>
>>> vs taking the pages out of
>>> AGP, setting up a cache-coherent mapping, read and then change back. 
>>> What we'd really would like to set up is a pool of completely 
>>> unmapped (like highmem) pages. Then we could, to a large extent, 
>>> avoid the CPA calls.
>>
>> changing attributes by nature means a tlb flush and a bunch of 
>> expensive cache work.
>> That's never going to be cheap, I guess it all depends on how much 
>> work you do
>> on the memory for it to pay off or not...
> Indeed. Actually with the new non-wbinvd() CPA, We seem to benefit 
> already if the buffer is a single page, though it's probably hard to 
> measure the impact of repopulating the tlb.
> 
> /Thomas
> 
> 
> 


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 16:49                 ` Arjan van de Ven
@ 2008-03-31 17:26                   ` Thomas Hellström
  2008-04-01 20:58                     ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-03-31 17:26 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>> Arjan van de Ven wrote:
>>> Thomas Hellström wrote:
>>>
>>>> Let me rehprase. Not really time-critical but it is of some 
>>>> importance that CPA is done quickly.
>>>> We're dealing with the tradeoff of reading from uncached device memory 
>>>
>>> uncached or write combining ?
>> The user-space mappings (the ones that we really use) are usually 
>> write-combined, whereas the kernel mappings are uncached. (I think 
>> this is OK since both mapping types implies no cache coherency). 
>
> This is not officially allowed and may tripple fault your cpu..
> To comply with the spec one needs to have ALL mappings the same 
> unfortunately.
> (And yes, this is a hard problem)
>
Hmm...

Given this problem, the previously mentioned use-case, and the fact that 
we mostly really use user-space mappings,
Is there a possibility we could add the following functions to Dave's 
patch (provided they would work as intended, of course, namely 
invalidate / bring back the kernel mapping).
Then we can set up a pool of unmapped pages, avoid frequent use of CPA 
and everybody's happy.

int set_memory_array_np(unsigned long *addr, int addrinarray)
{
    return change_page_attr_clr(addr, addrinarray,
                    __pgprot(_PAGE_PRESENT  |  _PAGE_RW), 1);
}
EXPORT_SYMBOL(set_memory_array_np);

int set_memory_array_p(unsigned long *addr, int addrinarray)
{
    return change_page_attr_set(addr, addrinarray,
                    __pgprot(_PAGE_PRESENT  |  _PAGE_RW), 1);
}
EXPORT_SYMBOL(set_memory_array_p);

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31  5:19 [PATCH] x86: create array based interface to change page attribute Dave Airlie
  2008-03-31  6:54 ` Thomas Hellström
  2008-03-31  7:25 ` Andi Kleen
@ 2008-04-01 18:20 ` Arjan van de Ven
  2 siblings, 0 replies; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-01 18:20 UTC (permalink / raw)
  To: Dave Airlie; +Cc: linux-kernel, tglx, mingo, thomas

Dave Airlie wrote:
> When cpa was refactored to the new set_memory_ interfaces, it removed
> a special case fast path for AGP systems, which did a lot of page by page
> attribute changing and held the flushes until they were finished. The
> DRM memory manager also required this to get useable speeds.
> 
> This introduces a new interface, which accepts an array of memory addresses
> to have attributes changed on and to flush once finished.
> 
> Further changes to the AGP stack to actually use this interface will be
> published later.

Acked-by: Arjan van de Ven <arjan@linux.intel.com>

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 17:26                   ` Thomas Hellström
@ 2008-04-01 20:58                     ` Arjan van de Ven
  2008-04-01 21:29                       ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-01 20:58 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Given this problem, the previously mentioned use-case, and the fact that 
> we mostly really use user-space mappings,
> Is there a possibility we could add the following functions to Dave's 
> patch (provided they would work as intended, of course, namely 
> invalidate / bring back the kernel mapping).

sadly there are multiple mappings, both in theory and practice.
Especially the _np / _p functions specifically work on only the mapping you specify.

For this to work we would need to somehow make a "mark all mappings NP, but please only do the kernel ones" kind of thing.
The semantics of that are... lets say messy at best.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-01 20:58                     ` Arjan van de Ven
@ 2008-04-01 21:29                       ` Thomas Hellström
  2008-04-01 22:30                         ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-01 21:29 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>> Given this problem, the previously mentioned use-case, and the fact 
>> that we mostly really use user-space mappings,
>> Is there a possibility we could add the following functions to Dave's 
>> patch (provided they would work as intended, of course, namely 
>> invalidate / bring back the kernel mapping).
>
> sadly there are multiple mappings, both in theory and practice.
> Especially the _np / _p functions specifically work on only the 
> mapping you specify.
>
> For this to work we would need to somehow make a "mark all mappings 
> NP, but please only do the kernel ones" kind of thing.
> The semantics of that are... lets say messy at best.
Hmm, I'm not sure I follow you here. Are you saying that it's illegal to 
have an NP mapping of a page (which, If I understand it correctly, means 
no mapping at all) at the same time as you have a, say user-space WC 
mapping pointing to the same physical page?

Or are you saying that it's very hard to keep track of all mappings to a 
page, and change only the ones you really want to change?

If you mean the latter, for the DRM case, the DRM memory manager has 
already made sure all user-space mappings of a page are killed before 
calling CPA on it, (using unmap_mapping_range()) and they are faulted 
back once CPA is done.

I was under the impression that calling CPA on the kernel mapping of 
that page would do the rest?

/Thomas





^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-01 21:29                       ` Thomas Hellström
@ 2008-04-01 22:30                         ` Arjan van de Ven
  2008-04-02  6:30                           ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-01 22:30 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Arjan van de Ven wrote:
>> Thomas Hellström wrote:
>>> Given this problem, the previously mentioned use-case, and the fact 
>>> that we mostly really use user-space mappings,
>>> Is there a possibility we could add the following functions to Dave's 
>>> patch (provided they would work as intended, of course, namely 
>>> invalidate / bring back the kernel mapping).
>>
>> sadly there are multiple mappings, both in theory and practice.
>> Especially the _np / _p functions specifically work on only the 
>> mapping you specify.
>>
>> For this to work we would need to somehow make a "mark all mappings 
>> NP, but please only do the kernel ones" kind of thing.
>> The semantics of that are... lets say messy at best.
> Hmm, I'm not sure I follow you here. Are you saying that it's illegal to 
> have an NP mapping of a page (which, If I understand it correctly, means 
> no mapping at all) at the same time as you have a, say user-space WC 
> mapping pointing to the same physical page?

no.
What I'm saying is that even if you make one mapping NP, it's hard to know you got all of them,
even just the kernel one.

> 
> I was under the impression that calling CPA on the kernel mapping of 
> that page would do the rest?

this is not a correct assumption in general, especially not for things like
"present".
If a page is mapped 3 times in the kernel, and you set one to "np", the others
WILL stay mapped.

(and CPA itself no longer exists period ;)

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-03-31 11:46     ` Andi Kleen
@ 2008-04-02  1:35       ` Dave Airlie
  0 siblings, 0 replies; 39+ messages in thread
From: Dave Airlie @ 2008-04-02  1:35 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Dave Airlie, linux-kernel, tglx, mingo, arjan, thomas


On Mon, 2008-03-31 at 13:46 +0200, Andi Kleen wrote:
> On Mon, Mar 31, 2008 at 09:21:19PM +1000, Dave Airlie wrote:
> > On Mon, Mar 31, 2008 at 5:25 PM, Andi Kleen <andi@firstfloor.org> wrote:
> > > Dave Airlie <airlied@redhat.com> writes:
> > >  >
> > >  > +#define CPA_FLUSHTLB 1
> > >  > +#define CPA_ARRAY 2
> > >
> > >  I don't think CPA_ARRAY should be a separate case. Rather single
> > >  page flushing should be an array with only a single entry. pageattr
> > >  is already very complex, no need to make add more special cases.
> > 
> > I thought about this but the current interface takes a start address
> > and number of pages from that point to cpa,
> > the array interface takes an array of page sized pages.
> > 
> > I don't really think we need to generate an array in the first case
> > with all the pages in it..
> 
> Just put the length into the array members too.

I can only think to do this by maybe stealing some bits, and having the
caller provide a pfn + length in one 64-bit dword.

As otherwise I'm going to end up with the largest use case for this
having to allocate a large array of 64-bit dwords all containing 1.

This isn't worth it IMHO and the array doesn't add that much more
complexity.

Dave.


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-01 22:30                         ` Arjan van de Ven
@ 2008-04-02  6:30                           ` Thomas Hellström
  2008-04-02  6:35                             ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-02  6:30 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>> Arjan van de Ven wrote:
>>> Thomas Hellström wrote:
>>>> Given this problem, the previously mentioned use-case, and the fact 
>>>> that we mostly really use user-space mappings,
>>>> Is there a possibility we could add the following functions to 
>>>> Dave's patch (provided they would work as intended, of course, 
>>>> namely invalidate / bring back the kernel mapping).
>>>
>>> sadly there are multiple mappings, both in theory and practice.
>>> Especially the _np / _p functions specifically work on only the 
>>> mapping you specify.
>>>
>>> For this to work we would need to somehow make a "mark all mappings 
>>> NP, but please only do the kernel ones" kind of thing.
>>> The semantics of that are... lets say messy at best.
>> Hmm, I'm not sure I follow you here. Are you saying that it's illegal 
>> to have an NP mapping of a page (which, If I understand it correctly, 
>> means no mapping at all) at the same time as you have a, say 
>> user-space WC mapping pointing to the same physical page?
>
> no.
> What I'm saying is that even if you make one mapping NP, it's hard to 
> know you got all of them,
> even just the kernel one.
>
>>
>> I was under the impression that calling CPA on the kernel mapping of 
>> that page would do the rest?
>
> this is not a correct assumption in general, especially not for things 
> like
> "present".
> If a page is mapped 3 times in the kernel, and you set one to "np", 
> the others
> WILL stay mapped.
>
> (and CPA itself no longer exists period ;)
But what mappings are there,  immediately after alloc_page(), that 
set_memory_np won't catch? Certainly kmap_atomic() used to leave some 
stale mappings floating around, but that has been fixed as far as I can 
tell, and I guess we must be able to assume that a driver keeps track of 
its own kernel mappings and kill them before calling set_memory_np.

Drivers relying on set_memory_uc touching all mappings the driver hasn't 
set up itself must then have the same problem and needs to be fixed; 
referring in particular to agpgart for which driver the old CPA 
functionality was once created, IIRC.

We should probably get this right as soon as possible, as the agpgart 
driver is (and has for the last couple of years been) relying on wc / uc 
aliasing to be legal. If we can fix this with set_memory_np / 
set_memory_p that would be great, and would have some additional 
benefits for the drm memory manager as well.

/Thomas







^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-02  6:30                           ` Thomas Hellström
@ 2008-04-02  6:35                             ` Arjan van de Ven
  2008-04-02  6:59                               ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-02  6:35 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:

> But what mappings are there,  immediately after alloc_page(), that 
> set_memory_np won't catch? 

For example on x86 64bit, the kernel text is mapped (to allow relocatable) in another
space as well.. and to allow 2Mb tlbs for this, that second mapping is bigger than it strictly needs to be.
So your alloc_page() could get a page from the free pool that comes from the pages that have a second mapping
due to this rounding.
(and more fun, since it's close to frequently accessed memory, the hw prefetchers may actually just decide to pull
such pages into the cache preemptively)


> Drivers relying on set_memory_uc touching all mappings the driver hasn't 
> set up itself must then have the same problem and needs to be fixed; 
> referring in particular to agpgart for which driver the old CPA 
> functionality was once created, IIRC.

"uc" is different than "np"; for "uc" the implementation, if your cpu needs this, will fix
up the shadow text mappings already today.

for "present" this doesn't make sense, since no cpu needs this.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-02  6:35                             ` Arjan van de Ven
@ 2008-04-02  6:59                               ` Thomas Hellström
  2008-04-02 14:01                                 ` Arjan van de Ven
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-02  6:59 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>
>> But what mappings are there,  immediately after alloc_page(), that 
>> set_memory_np won't catch? 
>
> For example on x86 64bit, the kernel text is mapped (to allow 
> relocatable) in another
> space as well.. and to allow 2Mb tlbs for this, that second mapping is 
> bigger than it strictly needs to be.
> So your alloc_page() could get a page from the free pool that comes 
> from the pages that have a second mapping
> due to this rounding.
> (and more fun, since it's close to frequently accessed memory, the hw 
> prefetchers may actually just decide to pull
> such pages into the cache preemptively)
>
>
>> Drivers relying on set_memory_uc touching all mappings the driver 
>> hasn't set up itself must then have the same problem and needs to be 
>> fixed; referring in particular to agpgart for which driver the old 
>> CPA functionality was once created, IIRC.
>
> "uc" is different than "np"; for "uc" the implementation, if your cpu 
> needs this, will fix
> up the shadow text mappings already today.
>
> for "present" this doesn't make sense, since no cpu needs this.
If this is the checkalias() thingy in x86/pageattr.c, it looks like it 
doesn't handle np different from the uc case. It does obviously skip NX 
bit manipulations, though.

Anyway, a more direct question: If we were to fix whatever's missing for 
the code to fixup the x86-64 shadow text mapping, would you be opposing 
this way to fix the long standing uc/wc aliasing issue, provided we 
don't hit any other problems, like clflush() refusing to run on an NP page?

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-02  6:59                               ` Thomas Hellström
@ 2008-04-02 14:01                                 ` Arjan van de Ven
  2008-04-02 17:57                                   ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-02 14:01 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Arjan van de Ven wrote:
>> Thomas Hellström wrote:
>>
>>> But what mappings are there,  immediately after alloc_page(), that 
>>> set_memory_np won't catch? 
>>
>> For example on x86 64bit, the kernel text is mapped (to allow 
>> relocatable) in another
>> space as well.. and to allow 2Mb tlbs for this, that second mapping is 
>> bigger than it strictly needs to be.
>> So your alloc_page() could get a page from the free pool that comes 
>> from the pages that have a second mapping
>> due to this rounding.
>> (and more fun, since it's close to frequently accessed memory, the hw 
>> prefetchers may actually just decide to pull
>> such pages into the cache preemptively)
>>
>>
>>> Drivers relying on set_memory_uc touching all mappings the driver 
>>> hasn't set up itself must then have the same problem and needs to be 
>>> fixed; referring in particular to agpgart for which driver the old 
>>> CPA functionality was once created, IIRC.
>>
>> "uc" is different than "np"; for "uc" the implementation, if your cpu 
>> needs this, will fix
>> up the shadow text mappings already today.
>>
>> for "present" this doesn't make sense, since no cpu needs this.
> If this is the checkalias() thingy in x86/pageattr.c, it looks like it 
> doesn't handle np different from the uc case. It does obviously skip NX 
> bit manipulations, though.
> 
> Anyway, a more direct question: If we were to fix whatever's missing for 
> the code to fixup the x86-64 shadow text mapping, would you be opposing 
> this way 

I do really oppose set_memory_np/p to work on anything but the mapping it is handled.
that _uc and co touch other mappings is a cpu specific property and an implementation
detail to the API. If/When CPUs exist that don't have issues with aliases, those CPUs
will not change the other mappings.

 > to fix the long standing uc/wc aliasing issue, provided we

I'm not opposed to a real fix. I am opposed to a bad hack.

> don't hit any other problems, like clflush() refusing to run on an NP page?

yes clflush doesn't work on not present pages ;)

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-02 14:01                                 ` Arjan van de Ven
@ 2008-04-02 17:57                                   ` Thomas Hellström
  2008-04-07 18:23                                     ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-02 17:57 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>
> > to fix the long standing uc/wc aliasing issue, provided we
>
> I'm not opposed to a real fix. I am opposed to a bad hack.
>
Great. So a real clean fix involves setting all "default" kernel 
mappings either to WC (which will require PAT) or
Unmapped, for a pool of pages used in the graphics tables.

To reduce the number of attribute changes for mappings that are 
frequently switched, and also to reduce the number of clflushes, and to 
avoid waiting for upcoming wc versions of set_memory_xx, I have a strong 
preference for unmapping the pages.

Now is where I need some guidance, because interface design is not my 
strong side. I see three possible ways to do this.

1) Use set_memory_np(). Not desirable since we want to be able to use 
that function on a single mapping, and not imply other semantics.
2) Have the driver try to find out which "default" mappings the kernel 
has set up on a page and call set_memory_np() on each one of them. This 
seems very fragile and ugly even to me.
3) Have code in x86/pageattr.c decide which "default" mappings are 
present on the given pages and set them all as non-present.
In fact, there is already such a function in pageattr.c:

kernel_map_pages(struct page *pages, int numpages, bool enable);

But it's for debugging purposes only, could we use and export a variant 
of this?

I guess I need a hint as to what's considered allowable here, to avoid 
spending a lot of time on something that will in the end get rejected 
anyway.

Thanks,

Thomas






^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-02 17:57                                   ` Thomas Hellström
@ 2008-04-07 18:23                                     ` Jesse Barnes
  2008-04-07 19:51                                       ` Thomas Hellström
  0 siblings, 1 reply; 39+ messages in thread
From: Jesse Barnes @ 2008-04-07 18:23 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

On Wednesday, April 02, 2008 10:57 am Thomas Hellström wrote:
> Arjan van de Ven wrote:
> > Thomas Hellström wrote:
> > > to fix the long standing uc/wc aliasing issue, provided we
> >
> > I'm not opposed to a real fix. I am opposed to a bad hack.
>
> Great. So a real clean fix involves setting all "default" kernel
> mappings either to WC (which will require PAT) or
> Unmapped, for a pool of pages used in the graphics tables.
>
> To reduce the number of attribute changes for mappings that are
> frequently switched, and also to reduce the number of clflushes, and to
> avoid waiting for upcoming wc versions of set_memory_xx, I have a strong
> preference for unmapping the pages.

Hopefully the WC stuff will be upstream right after 2.6.25 comes out.  Any 
reason why we shouldn't keep the pages mapped in the kernel as WC assuming 
the interface is there?

And we really should be keeping pools of pages around with the right type--we 
don't want to change attributes any more than absolutely necessary (the ia64 
uncached allocator does this right already, and in the DRM we actually keep 
the mappings around right now afaict).  We can allocate & free large chunks 
at a time to deal with memory pressure one way or another...

> 3) Have code in x86/pageattr.c decide which "default" mappings are
> present on the given pages and set them all as non-present.
> In fact, there is already such a function in pageattr.c:
>
> kernel_map_pages(struct page *pages, int numpages, bool enable);
>
> But it's for debugging purposes only, could we use and export a variant
> of this?
>
> I guess I need a hint as to what's considered allowable here, to avoid
> spending a lot of time on something that will in the end get rejected
> anyway.

I think we do want an interface like this, even if only for graphics memory 
(though I suspect some other device might like it as well).  We'll also want 
to do it at runtime periodically to allocate new hunks of memory for graphics 
use, so a boot-time only thing won't work.

Also, to make the API readable, we'd probably want to split the function into 
kernel_map_pages(..., enum memory_type type) and kernel_unmap_pages(...) 
(though like I said I think we really should be mapping them WC not umapping 
them altogether, since we do want to hit the ring buffer from the kernel with 
the WC type for example).

Question is, will kernel_map_pages catch all the various kernel mappings 
(regular identity map, large page text map,e tc.), perform the proper 
flushing, and generally make sure we don't machine check on all platforms?

Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 18:23                                     ` Jesse Barnes
@ 2008-04-07 19:51                                       ` Thomas Hellström
  2008-04-07 19:59                                         ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-07 19:51 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Jesse Barnes wrote:
> On Wednesday, April 02, 2008 10:57 am Thomas Hellström wrote:
>   
>> Arjan van de Ven wrote:
>>     
>>> Thomas Hellström wrote:
>>>       
>>>> to fix the long standing uc/wc aliasing issue, provided we
>>>>         
>>> I'm not opposed to a real fix. I am opposed to a bad hack.
>>>       
>> Great. So a real clean fix involves setting all "default" kernel
>> mappings either to WC (which will require PAT) or
>> Unmapped, for a pool of pages used in the graphics tables.
>>
>> To reduce the number of attribute changes for mappings that are
>> frequently switched, and also to reduce the number of clflushes, and to
>> avoid waiting for upcoming wc versions of set_memory_xx, I have a strong
>> preference for unmapping the pages.
>>     
>
> Hopefully the WC stuff will be upstream right after 2.6.25 comes out.  Any 
> reason why we shouldn't keep the pages mapped in the kernel as WC assuming 
> the interface is there?
>   
If the pages are unmapped, we can get reasonable speed doing 
unbind-read-bind operations, kernel accesses to the memory will need to 
use an iomap_atomic_prot_pfn() type of operation.
No IPI global tlb flushes needed for kernel mapping changes during 
unbind-read-bind and no cache flushes needed either if we write-protect 
the user-space mappings properly, or very limited cache flushes if we 
keep dirty-written-while-cached flags for each page.

If the pages are wc-d we'll need two extra IPI global tlb flushes and a 
buffer-size cache flush every time we do unbind-read-bind, but OTOH we 
don't need the iomap_atomic_prot_pfn() to access single pages from the 
kernel.

iomap_atomic_prot_pfn() should be really fast. It requires a 
single-page-single-processor tlb flush per map-unmap operation. For 
long-term and larger buffer maps we'll either use ioremap() or vmap() 
depending on memory type.
> And we really should be keeping pools of pages around with the right type--we 
> don't want to change attributes any more than absolutely necessary (the ia64 
> uncached allocator does this right already, and in the DRM we actually keep 
> the mappings around right now afaict).  We can allocate & free large chunks 
> at a time to deal with memory pressure one way or another...
>
>   
Agreed.
>> 3) Have code in x86/pageattr.c decide which "default" mappings are
>> present on the given pages and set them all as non-present.
>> In fact, there is already such a function in pageattr.c:
>>
>> kernel_map_pages(struct page *pages, int numpages, bool enable);
>>
>> But it's for debugging purposes only, could we use and export a variant
>> of this?
>>
>> I guess I need a hint as to what's considered allowable here, to avoid
>> spending a lot of time on something that will in the end get rejected
>> anyway.
>>     
>
> I think we do want an interface like this, even if only for graphics memory 
> (though I suspect some other device might like it as well).  We'll also want 
> to do it at runtime periodically to allocate new hunks of memory for graphics 
> use, so a boot-time only thing won't work.
>
> Also, to make the API readable, we'd probably want to split the function into 
> kernel_map_pages(..., enum memory_type type) and kernel_unmap_pages(...) 
> (though like I said I think we really should be mapping them WC not umapping 
> them altogether, since we do want to hit the ring buffer from the kernel with 
> the WC type for example).
>   
I think ring-buffers are using ioremap() or vmap() already today. We can 
use these to get WC-type access also in the future. The only time we use 
the linear kernel mapping today is for single page access while patching 
up command buffers.

So it's really a tradeoff between slightly faster single-page access and 
really fast unbind-read-bind operations. That's really why I'm 
suggesting unmapped pages.

> Question is, will kernel_map_pages catch all the various kernel mappings 
> (regular identity map, large page text map,e tc.), perform the proper 
> flushing, and generally make sure we don't machine check on all platforms?
>
>   
Probably not yet. OTOH, it seems like x86 is the only platform today 
that tries to do something about the AGP page aliasing.
> Jesse
>   
/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 19:51                                       ` Thomas Hellström
@ 2008-04-07 19:59                                         ` Jesse Barnes
  2008-04-07 20:46                                           ` Thomas Hellström
  2008-04-07 20:56                                           ` Arjan van de Ven
  0 siblings, 2 replies; 39+ messages in thread
From: Jesse Barnes @ 2008-04-07 19:59 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

On Monday, April 07, 2008 12:51 pm Thomas Hellström wrote:
> > Hopefully the WC stuff will be upstream right after 2.6.25 comes out. 
> > Any reason why we shouldn't keep the pages mapped in the kernel as WC
> > assuming the interface is there?
>
> If the pages are unmapped, we can get reasonable speed doing
> unbind-read-bind operations, kernel accesses to the memory will need to
> use an iomap_atomic_prot_pfn() type of operation.
> No IPI global tlb flushes needed for kernel mapping changes during
> unbind-read-bind and no cache flushes needed either if we write-protect
> the user-space mappings properly, or very limited cache flushes if we
> keep dirty-written-while-cached flags for each page.
>
> If the pages are wc-d we'll need two extra IPI global tlb flushes and a
> buffer-size cache flush every time we do unbind-read-bind, but OTOH we
> don't need the iomap_atomic_prot_pfn() to access single pages from the
> kernel.

Why would we need to flush at all at unbind-read-bind time?  We should be able 
to leave pages in the WC state even when we unbind them, then when we need to 
bind them back into the GTT they'll be ready, but maybe I'm misunderstanding 
you here...

> > Also, to make the API readable, we'd probably want to split the function
> > into kernel_map_pages(..., enum memory_type type) and
> > kernel_unmap_pages(...) (though like I said I think we really should be
> > mapping them WC not umapping them altogether, since we do want to hit the
> > ring buffer from the kernel with the WC type for example).
>
> I think ring-buffers are using ioremap() or vmap() already today. We can
> use these to get WC-type access also in the future. The only time we use
> the linear kernel mapping today is for single page access while patching
> up command buffers.

Yeah, they're ioremapped now, but that's a problem since with the PAT patches 
they'll be mapped hard UC (right now it just happens to work).

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 19:59                                         ` Jesse Barnes
@ 2008-04-07 20:46                                           ` Thomas Hellström
  2008-04-07 20:57                                             ` Arjan van de Ven
  2008-04-07 21:04                                             ` Jesse Barnes
  2008-04-07 20:56                                           ` Arjan van de Ven
  1 sibling, 2 replies; 39+ messages in thread
From: Thomas Hellström @ 2008-04-07 20:46 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Jesse Barnes wrote:
> On Monday, April 07, 2008 12:51 pm Thomas Hellström wrote:
>   
>>> Hopefully the WC stuff will be upstream right after 2.6.25 comes out. 
>>> Any reason why we shouldn't keep the pages mapped in the kernel as WC
>>> assuming the interface is there?
>>>       
>> If the pages are unmapped, we can get reasonable speed doing
>> unbind-read-bind operations, kernel accesses to the memory will need to
>> use an iomap_atomic_prot_pfn() type of operation.
>> No IPI global tlb flushes needed for kernel mapping changes during
>> unbind-read-bind and no cache flushes needed either if we write-protect
>> the user-space mappings properly, or very limited cache flushes if we
>> keep dirty-written-while-cached flags for each page.
>>
>> If the pages are wc-d we'll need two extra IPI global tlb flushes and a
>> buffer-size cache flush every time we do unbind-read-bind, but OTOH we
>> don't need the iomap_atomic_prot_pfn() to access single pages from the
>> kernel.
>>     
>
> Why would we need to flush at all at unbind-read-bind time?  We should be able 
> to leave pages in the WC state even when we unbind them, then when we need to 
> bind them back into the GTT they'll be ready, but maybe I'm misunderstanding 
> you here...
>
>   
We want to make the user-space mapping cache-coherent after unbind 
during read, to have any serious read-speed, and the linear kernel map 
has to follow, unless it's non-present. Even if it's non present, we 
need to flush whatever was written through the user-space mapping from 
the cache when rebinding. Having the user-space mapping read-only when 
possible will help avoid this.
>>> Also, to make the API readable, we'd probably want to split the function
>>> into kernel_map_pages(..., enum memory_type type) and
>>> kernel_unmap_pages(...) (though like I said I think we really should be
>>> mapping them WC not umapping them altogether, since we do want to hit the
>>> ring buffer from the kernel with the WC type for example).
>>>       
>> I think ring-buffers are using ioremap() or vmap() already today. We can
>> use these to get WC-type access also in the future. The only time we use
>> the linear kernel mapping today is for single page access while patching
>> up command buffers.
>>     
>
> Yeah, they're ioremapped now, but that's a problem since with the PAT patches 
> they'll be mapped hard UC (right now it just happens to work).
>   
Ouch, so we'll be needing an ioremap_wc(), I guess. We probably 
shouldn't use the linear kernel map for this anyway, since that would 
require a chipset flush for each ring commit.  We can actually use 
vmap() with a wc page protection for that, but an ioremap_wc() would 
certainly save us a lot of trouble.
> Thanks,
> Jesse
>   
/Thomas





^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 19:59                                         ` Jesse Barnes
  2008-04-07 20:46                                           ` Thomas Hellström
@ 2008-04-07 20:56                                           ` Arjan van de Ven
  2008-04-07 21:02                                             ` Jesse Barnes
  1 sibling, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-07 20:56 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Thomas Hellström, Andi Kleen, Dave Airlie, linux-kernel,
	tglx, mingo

Jesse Barnes wrote:
> 
> Yeah, they're ioremapped now, but that's a problem since with the PAT patches 
> they'll be mapped hard UC (right now it just happens to work).
> 
clearly this needs to use ioremap_wc(), I was assuming nothing else....
(code mapping things uncached and then frobbing the MTRR's underneath really shouldn't
be allowed to live from 2.6.26 and forward. Esp for the 3D drivers we KNOW that we
want _wc so that is a clear case of just fixing the code to do the right thing)

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 20:46                                           ` Thomas Hellström
@ 2008-04-07 20:57                                             ` Arjan van de Ven
  2008-04-08  6:12                                               ` Thomas Hellström
  2008-04-07 21:04                                             ` Jesse Barnes
  1 sibling, 1 reply; 39+ messages in thread
From: Arjan van de Ven @ 2008-04-07 20:57 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Jesse Barnes, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Thomas Hellström wrote:
> Ouch, so we'll be needing an ioremap_wc(), I guess. We probably 
> shouldn't use the linear kernel map for this anyway, since that would 
> require a chipset flush for each ring commit.  We can actually use 
> vmap() with a wc page protection for that, but an ioremap_wc() would 
> certainly save us a lot of trouble.

please don't use vmap() as a hack for mapping IO memory when you should use ioremap_wc()....
Lets get this thing right rather than finding whichever hack of the day
happens to work with the implementation from yesterday.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 20:56                                           ` Arjan van de Ven
@ 2008-04-07 21:02                                             ` Jesse Barnes
  2008-04-07 21:09                                               ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Jesse Barnes @ 2008-04-07 21:02 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Thomas Hellström, Andi Kleen, Dave Airlie, linux-kernel,
	tglx, mingo

On Monday, April 07, 2008 1:56 pm Arjan van de Ven wrote:
> Jesse Barnes wrote:
> > Yeah, they're ioremapped now, but that's a problem since with the PAT
> > patches they'll be mapped hard UC (right now it just happens to work).
>
> clearly this needs to use ioremap_wc(), I was assuming nothing else....
> (code mapping things uncached and then frobbing the MTRR's underneath
> really shouldn't be allowed to live from 2.6.26 and forward. Esp for the 3D
> drivers we KNOW that we want _wc so that is a clear case of just fixing the
> code to do the right thing)

Totally agreed, the AGP code needs to be specific about what it wants...

Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 20:46                                           ` Thomas Hellström
  2008-04-07 20:57                                             ` Arjan van de Ven
@ 2008-04-07 21:04                                             ` Jesse Barnes
  2008-04-08  6:21                                               ` Thomas Hellström
  1 sibling, 1 reply; 39+ messages in thread
From: Jesse Barnes @ 2008-04-07 21:04 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

On Monday, April 07, 2008 1:46 pm Thomas Hellström wrote:
> > Why would we need to flush at all at unbind-read-bind time?  We should be
> > able to leave pages in the WC state even when we unbind them, then when
> > we need to bind them back into the GTT they'll be ready, but maybe I'm
> > misunderstanding you here...
>
> We want to make the user-space mapping cache-coherent after unbind
> during read, to have any serious read-speed, and the linear kernel map
> has to follow, unless it's non-present. Even if it's non present, we
> need to flush whatever was written through the user-space mapping from
> the cache when rebinding. Having the user-space mapping read-only when
> possible will help avoid this.

Ah, you actually want to *read* from memory?  Yeah that would be really slow 
if we left it UC or WC.  But I thought that was really only necessary for 
relocation, and keithp dealt with that with the "presumed offset" stuff?  Are 
you seeing other cases where we need to read back frequently?

> > Yeah, they're ioremapped now, but that's a problem since with the PAT
> > patches they'll be mapped hard UC (right now it just happens to work).
>
> Ouch, so we'll be needing an ioremap_wc(), I guess. We probably
> shouldn't use the linear kernel map for this anyway, since that would
> require a chipset flush for each ring commit.  We can actually use
> vmap() with a wc page protection for that, but an ioremap_wc() would
> certainly save us a lot of trouble.

Yeah, ioremap_wc is probably the best thing to use in the DRM.  And in AGP 
we'll need to clarify things more, since some drivers can do cacheable 
memory, some want UC, etc.

Thanks,
Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 21:02                                             ` Jesse Barnes
@ 2008-04-07 21:09                                               ` Jesse Barnes
  0 siblings, 0 replies; 39+ messages in thread
From: Jesse Barnes @ 2008-04-07 21:09 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Thomas Hellström, Andi Kleen, Dave Airlie, linux-kernel,
	tglx, mingo

On Monday, April 07, 2008 2:02 pm Jesse Barnes wrote:
> On Monday, April 07, 2008 1:56 pm Arjan van de Ven wrote:
> > Jesse Barnes wrote:
> > > Yeah, they're ioremapped now, but that's a problem since with the PAT
> > > patches they'll be mapped hard UC (right now it just happens to work).
> >
> > clearly this needs to use ioremap_wc(), I was assuming nothing else....
> > (code mapping things uncached and then frobbing the MTRR's underneath
> > really shouldn't be allowed to live from 2.6.26 and forward. Esp for the
> > 3D drivers we KNOW that we want _wc so that is a clear case of just
> > fixing the code to do the right thing)
>
> Totally agreed, the AGP code needs to be specific about what it wants...

Err DRM code (but AGP does too).

Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 20:57                                             ` Arjan van de Ven
@ 2008-04-08  6:12                                               ` Thomas Hellström
  0 siblings, 0 replies; 39+ messages in thread
From: Thomas Hellström @ 2008-04-08  6:12 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Jesse Barnes, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Arjan van de Ven wrote:
> Thomas Hellström wrote:
>> Ouch, so we'll be needing an ioremap_wc(), I guess. We probably 
>> shouldn't use the linear kernel map for this anyway, since that would 
>> require a chipset flush for each ring commit.  We can actually use 
>> vmap() with a wc page protection for that, but an ioremap_wc() would 
>> certainly save us a lot of trouble.
>
> please don't use vmap() as a hack for mapping IO memory when you 
> should use ioremap_wc()....
I think you've misinterpreted. We must use vmap() if we need to map the 
cpu side of the aperture, (real pages) and ioremap() if we map the 
device side, which is io memory. Some devices accept both, but what I 
was saying was we should try to avoid the vmap() alternative, since it, 
like using the kernel mapping directly, requires a chipset flush after 
update.
> Lets get this thing right rather than finding whichever hack of the day
> happens to work with the implementation from yesterday.
/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-07 21:04                                             ` Jesse Barnes
@ 2008-04-08  6:21                                               ` Thomas Hellström
  2008-04-08 14:27                                                 ` Jesse Barnes
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Hellström @ 2008-04-08  6:21 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

Jesse Barnes wrote:
> On Monday, April 07, 2008 1:46 pm Thomas Hellström wrote:
>   
>>> Why would we need to flush at all at unbind-read-bind time?  We should be
>>> able to leave pages in the WC state even when we unbind them, then when
>>> we need to bind them back into the GTT they'll be ready, but maybe I'm
>>> misunderstanding you here...
>>>       
>> We want to make the user-space mapping cache-coherent after unbind
>> during read, to have any serious read-speed, and the linear kernel map
>> has to follow, unless it's non-present. Even if it's non present, we
>> need to flush whatever was written through the user-space mapping from
>> the cache when rebinding. Having the user-space mapping read-only when
>> possible will help avoid this.
>>     
>
> Ah, you actually want to *read* from memory?  Yeah that would be really slow 
> if we left it UC or WC.  But I thought that was really only necessary for 
> relocation, and keithp dealt with that with the "presumed offset" stuff?  Are 
> you seeing other cases where we need to read back frequently?
>   
While keithp's "presumed offset" is really good stuff, we should never 
need to read from device memory during relocations, I think. I was 
thinking of EXA and OpenGL software fallback cases, "download from 
screen" and "readpixels" types of operation, as well as 
pixel-buffer-object map in read mode.

/Thomas




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] x86: create array based interface to change page attribute
  2008-04-08  6:21                                               ` Thomas Hellström
@ 2008-04-08 14:27                                                 ` Jesse Barnes
  0 siblings, 0 replies; 39+ messages in thread
From: Jesse Barnes @ 2008-04-08 14:27 UTC (permalink / raw)
  To: Thomas Hellström
  Cc: Arjan van de Ven, Andi Kleen, Dave Airlie, linux-kernel, tglx, mingo

On Monday, April 07, 2008 11:21 pm Thomas Hellström wrote:
> Jesse Barnes wrote:
> > On Monday, April 07, 2008 1:46 pm Thomas Hellström wrote:
> >>> Why would we need to flush at all at unbind-read-bind time?  We should
> >>> be able to leave pages in the WC state even when we unbind them, then
> >>> when we need to bind them back into the GTT they'll be ready, but maybe
> >>> I'm misunderstanding you here...
> >>
> >> We want to make the user-space mapping cache-coherent after unbind
> >> during read, to have any serious read-speed, and the linear kernel map
> >> has to follow, unless it's non-present. Even if it's non present, we
> >> need to flush whatever was written through the user-space mapping from
> >> the cache when rebinding. Having the user-space mapping read-only when
> >> possible will help avoid this.
> >
> > Ah, you actually want to *read* from memory?  Yeah that would be really
> > slow if we left it UC or WC.  But I thought that was really only
> > necessary for relocation, and keithp dealt with that with the "presumed
> > offset" stuff?  Are you seeing other cases where we need to read back
> > frequently?
>
> While keithp's "presumed offset" is really good stuff, we should never
> need to read from device memory during relocations, I think. I was
> thinking of EXA and OpenGL software fallback cases, "download from
> screen" and "readpixels" types of operation, as well as
> pixel-buffer-object map in read mode.

Hm, yeah, anholt was telling me about some of these cases offline too.  This 
is what makes things really difficult...

Jesse

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2008-04-08 14:29 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-31  5:19 [PATCH] x86: create array based interface to change page attribute Dave Airlie
2008-03-31  6:54 ` Thomas Hellström
2008-03-31  9:33   ` Arjan van de Ven
2008-03-31 11:04     ` Thomas Hellström
2008-03-31  7:25 ` Andi Kleen
2008-03-31  7:55   ` Thomas Hellström
2008-03-31  8:38     ` Andi Kleen
2008-03-31  9:06       ` Thomas Hellström
2008-03-31  9:18         ` Andi Kleen
2008-03-31 11:10           ` Thomas Hellström
2008-03-31 16:08             ` Arjan van de Ven
2008-03-31 16:41               ` Thomas Hellström
2008-03-31 16:49                 ` Arjan van de Ven
2008-03-31 17:26                   ` Thomas Hellström
2008-04-01 20:58                     ` Arjan van de Ven
2008-04-01 21:29                       ` Thomas Hellström
2008-04-01 22:30                         ` Arjan van de Ven
2008-04-02  6:30                           ` Thomas Hellström
2008-04-02  6:35                             ` Arjan van de Ven
2008-04-02  6:59                               ` Thomas Hellström
2008-04-02 14:01                                 ` Arjan van de Ven
2008-04-02 17:57                                   ` Thomas Hellström
2008-04-07 18:23                                     ` Jesse Barnes
2008-04-07 19:51                                       ` Thomas Hellström
2008-04-07 19:59                                         ` Jesse Barnes
2008-04-07 20:46                                           ` Thomas Hellström
2008-04-07 20:57                                             ` Arjan van de Ven
2008-04-08  6:12                                               ` Thomas Hellström
2008-04-07 21:04                                             ` Jesse Barnes
2008-04-08  6:21                                               ` Thomas Hellström
2008-04-08 14:27                                                 ` Jesse Barnes
2008-04-07 20:56                                           ` Arjan van de Ven
2008-04-07 21:02                                             ` Jesse Barnes
2008-04-07 21:09                                               ` Jesse Barnes
2008-03-31  9:56   ` Arjan van de Ven
2008-03-31 11:21   ` Dave Airlie
2008-03-31 11:46     ` Andi Kleen
2008-04-02  1:35       ` Dave Airlie
2008-04-01 18:20 ` Arjan van de Ven

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).