All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
@ 2014-06-04 13:29 Christian König
  2014-06-04 13:29 ` [PATCH 2/3] drm/radeon: remove range check from *_gart_set_page Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 36+ messages in thread
From: Christian König @ 2014-06-04 13:29 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

When we set the valid bit on invalid GART entries they are
loaded into the TLB when an adjacent entry is loaded. This
poisons the TLB with invalid entries which are sometimes
not correctly removed on TLB flush.

For stable inclusion the patch probably needs to be modified a bit.

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/rs600.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 0a8be63..e0465b2 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 		return -EINVAL;
 	}
 	addr = addr & 0xFFFFFFFFFFFFF000ULL;
-	addr |= R600_PTE_GART;
+	if (addr == rdev->dummy_page.addr)
+		addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
+	else
+		addr |= R600_PTE_GART;
 	writeq(addr, ptr + (i * 8));
 	return 0;
 }
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/3] drm/radeon: remove range check from *_gart_set_page
  2014-06-04 13:29 [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Christian König
@ 2014-06-04 13:29 ` Christian König
  2014-06-04 13:29 ` [PATCH 3/3] drm/radeon: use the SDMA on for buffer moves on CIK again Christian König
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2014-06-04 13:29 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

We never check the return value anyway and if the
index isn't valid would crash way before calling
the functions.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/r100.c        |  8 ++------
 drivers/gpu/drm/radeon/r300.c        |  7 ++-----
 drivers/gpu/drm/radeon/radeon.h      |  3 ++-
 drivers/gpu/drm/radeon/radeon_asic.h | 12 ++++++++----
 drivers/gpu/drm/radeon/rs400.c       |  7 +------
 drivers/gpu/drm/radeon/rs600.c       |  6 +-----
 6 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ad99813..1544efc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -682,15 +682,11 @@ void r100_pci_gart_disable(struct radeon_device *rdev)
 	WREG32(RADEON_AIC_HI_ADDR, 0);
 }
 
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+			    uint64_t addr)
 {
 	u32 *gtt = rdev->gart.ptr;
-
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	gtt[i] = cpu_to_le32(lower_32_bits(addr));
-	return 0;
 }
 
 void r100_pci_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 206caf9..3c21d77 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -72,13 +72,11 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
-int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+			      uint64_t addr)
 {
 	void __iomem *ptr = rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	addr = (lower_32_bits(addr) >> 8) |
 	       ((upper_32_bits(addr) & 0xff) << 24) |
 	       R300_PTE_WRITEABLE | R300_PTE_READABLE;
@@ -86,7 +84,6 @@ int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 	 * on powerpc without HW swappers, it'll get swapped on way
 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
 	writel(addr, ((void __iomem *)ptr) + (i * 4));
-	return 0;
 }
 
 int rv370_pcie_gart_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0661a77..c08987c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1778,7 +1778,8 @@ struct radeon_asic {
 	/* gart */
 	struct {
 		void (*tlb_flush)(struct radeon_device *rdev);
-		int (*set_page)(struct radeon_device *rdev, int i, uint64_t addr);
+		void (*set_page)(struct radeon_device *rdev, unsigned i,
+				 uint64_t addr);
 	} gart;
 	struct {
 		int (*init)(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 0eab015..01e7c0a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -67,7 +67,8 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+			    uint64_t addr);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -171,7 +172,8 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev,
 				struct radeon_fence *fence);
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
-extern int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+				     uint64_t addr);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@@ -206,7 +208,8 @@ extern void rs400_fini(struct radeon_device *rdev);
 extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@@ -229,7 +232,8 @@ int rs600_irq_process(struct radeon_device *rdev);
 void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 130d5cc..a0f96de 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -212,21 +212,16 @@ void rs400_gart_fini(struct radeon_device *rdev)
 #define RS400_PTE_WRITEABLE (1 << 2)
 #define RS400_PTE_READABLE  (1 << 3)
 
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
 	uint32_t entry;
 	u32 *gtt = rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
-
 	entry = (lower_32_bits(addr) & PAGE_MASK) |
 		((upper_32_bits(addr) & 0xff) << 4) |
 		RS400_PTE_WRITEABLE | RS400_PTE_READABLE;
 	entry = cpu_to_le32(entry);
 	gtt[i] = entry;
-	return 0;
 }
 
 int rs400_mc_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index e0465b2..d1a35cb 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -626,20 +626,16 @@ static void rs600_gart_fini(struct radeon_device *rdev)
 	radeon_gart_table_vram_free(rdev);
 }
 
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
 	void __iomem *ptr = (void *)rdev->gart.ptr;
 
-	if (i < 0 || i > rdev->gart.num_gpu_pages) {
-		return -EINVAL;
-	}
 	addr = addr & 0xFFFFFFFFFFFFF000ULL;
 	if (addr == rdev->dummy_page.addr)
 		addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
 	else
 		addr |= R600_PTE_GART;
 	writeq(addr, ptr + (i * 8));
-	return 0;
 }
 
 int rs600_irq_set(struct radeon_device *rdev)
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/3] drm/radeon: use the SDMA on for buffer moves on CIK again
  2014-06-04 13:29 [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Christian König
  2014-06-04 13:29 ` [PATCH 2/3] drm/radeon: remove range check from *_gart_set_page Christian König
@ 2014-06-04 13:29 ` Christian König
  2014-06-04 13:46 ` [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Alex Deucher
  2014-06-10 23:30 ` Marek Olšák
  3 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2014-06-04 13:29 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

The underlying reason for the crashes seems to be fixed now.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon_asic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 34ea53d..34b9aa9 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -2029,8 +2029,8 @@ static struct radeon_asic ci_asic = {
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &cik_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
-		.copy = &cik_copy_cpdma,
-		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.copy = &cik_copy_dma,
+		.copy_ring_index = R600_RING_TYPE_DMA_INDEX,
 	},
 	.surface = {
 		.set_reg = r600_set_surface_reg,
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-04 13:29 [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Christian König
  2014-06-04 13:29 ` [PATCH 2/3] drm/radeon: remove range check from *_gart_set_page Christian König
  2014-06-04 13:29 ` [PATCH 3/3] drm/radeon: use the SDMA on for buffer moves on CIK again Christian König
@ 2014-06-04 13:46 ` Alex Deucher
  2014-06-04 13:50   ` Christian König
  2014-06-10 23:30 ` Marek Olšák
  3 siblings, 1 reply; 36+ messages in thread
From: Alex Deucher @ 2014-06-04 13:46 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Wed, Jun 4, 2014 at 9:29 AM, Christian König <deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> When we set the valid bit on invalid GART entries they are
> loaded into the TLB when an adjacent entry is loaded. This
> poisons the TLB with invalid entries which are sometimes
> not correctly removed on TLB flush.
>
> For stable inclusion the patch probably needs to be modified a bit.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: stable@vger.kernel.org

Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

stable cc on patch 2 or 3 as well?  I suppose we'd need to modify the
patches anyway so that they would apply on older kernels anyway.

Alex

> ---
>  drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
> index 0a8be63..e0465b2 100644
> --- a/drivers/gpu/drm/radeon/rs600.c
> +++ b/drivers/gpu/drm/radeon/rs600.c
> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
>                 return -EINVAL;
>         }
>         addr = addr & 0xFFFFFFFFFFFFF000ULL;
> -       addr |= R600_PTE_GART;
> +       if (addr == rdev->dummy_page.addr)
> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
> +       else
> +               addr |= R600_PTE_GART;
>         writeq(addr, ptr + (i * 8));
>         return 0;
>  }
> --
> 1.9.1
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-04 13:46 ` [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Alex Deucher
@ 2014-06-04 13:50   ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2014-06-04 13:50 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Maling list - DRI developers

Am 04.06.2014 15:46, schrieb Alex Deucher:
> On Wed, Jun 4, 2014 at 9:29 AM, Christian König <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> When we set the valid bit on invalid GART entries they are
>> loaded into the TLB when an adjacent entry is loaded. This
>> poisons the TLB with invalid entries which are sometimes
>> not correctly removed on TLB flush.
>>
>> For stable inclusion the patch probably needs to be modified a bit.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Cc: stable@vger.kernel.org
> Series is:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
> stable cc on patch 2 or 3 as well?  I suppose we'd need to modify the
> patches anyway so that they would apply on older kernels anyway.

No, the second patch is just an improvement of removing unnecessary 
checks and I think using the CPDMA on stable kernels is maybe still a 
good idea.

Christian

>
> Alex
>
>> ---
>>   drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
>> index 0a8be63..e0465b2 100644
>> --- a/drivers/gpu/drm/radeon/rs600.c
>> +++ b/drivers/gpu/drm/radeon/rs600.c
>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
>>                  return -EINVAL;
>>          }
>>          addr = addr & 0xFFFFFFFFFFFFF000ULL;
>> -       addr |= R600_PTE_GART;
>> +       if (addr == rdev->dummy_page.addr)
>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>> +       else
>> +               addr |= R600_PTE_GART;
>>          writeq(addr, ptr + (i * 8));
>>          return 0;
>>   }
>> --
>> 1.9.1
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-04 13:29 [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Christian König
                   ` (2 preceding siblings ...)
  2014-06-04 13:46 ` [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Alex Deucher
@ 2014-06-10 23:30 ` Marek Olšák
  2014-06-11  9:29   ` Christian König
  3 siblings, 1 reply; 36+ messages in thread
From: Marek Olšák @ 2014-06-10 23:30 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

Sorry to tell you the bad news. This patch doesn't fix the hangs on my machine.

I tested drm-next-3.16 from Alex's tree. I also switched copying from
SDMA to CP DMA, which hung too.

I also tried this:

git checkout (the problematic commit):
6d2f294 - drm/radeon: use normal BOs for the page tables v4

git cherry-pick (fixes):
0e97703c - drm/radeon: add define for flags used in R600+ GTT
0986c1a5 - drm/radeon: stop poisoning the GART TLB
4906f689 - drm/radeon: fix page directory update size estimation
4b095566 - drm/radeon: fix buffer placement under memory pressure v2

Then I tested both SDMA and CP DMA copying. Both were unstable.

Testing was done with piglit / quick.tests.

Marek


On Wed, Jun 4, 2014 at 3:29 PM, Christian König <deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> When we set the valid bit on invalid GART entries they are
> loaded into the TLB when an adjacent entry is loaded. This
> poisons the TLB with invalid entries which are sometimes
> not correctly removed on TLB flush.
>
> For stable inclusion the patch probably needs to be modified a bit.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: stable@vger.kernel.org
> ---
>  drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
> index 0a8be63..e0465b2 100644
> --- a/drivers/gpu/drm/radeon/rs600.c
> +++ b/drivers/gpu/drm/radeon/rs600.c
> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
>                 return -EINVAL;
>         }
>         addr = addr & 0xFFFFFFFFFFFFF000ULL;
> -       addr |= R600_PTE_GART;
> +       if (addr == rdev->dummy_page.addr)
> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
> +       else
> +               addr |= R600_PTE_GART;
>         writeq(addr, ptr + (i * 8));
>         return 0;
>  }
> --
> 1.9.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-10 23:30 ` Marek Olšák
@ 2014-06-11  9:29   ` Christian König
  2014-06-11 10:56     ` Marek Olšák
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-11  9:29 UTC (permalink / raw)
  To: Marek Olšák; +Cc: dri-devel

Crap, I already wanted to check back with you if that really fixes your 
problems.

Thanks for the info, this crash also only happens on CIK doesn't it?

Christian.

Am 11.06.2014 01:30, schrieb Marek Olšák:
> Sorry to tell you the bad news. This patch doesn't fix the hangs on my machine.
>
> I tested drm-next-3.16 from Alex's tree. I also switched copying from
> SDMA to CP DMA, which hung too.
>
> I also tried this:
>
> git checkout (the problematic commit):
> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>
> git cherry-pick (fixes):
> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
> 4906f689 - drm/radeon: fix page directory update size estimation
> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>
> Then I tested both SDMA and CP DMA copying. Both were unstable.
>
> Testing was done with piglit / quick.tests.
>
> Marek
>
>
> On Wed, Jun 4, 2014 at 3:29 PM, Christian König <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> When we set the valid bit on invalid GART entries they are
>> loaded into the TLB when an adjacent entry is loaded. This
>> poisons the TLB with invalid entries which are sometimes
>> not correctly removed on TLB flush.
>>
>> For stable inclusion the patch probably needs to be modified a bit.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Cc: stable@vger.kernel.org
>> ---
>>   drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
>> index 0a8be63..e0465b2 100644
>> --- a/drivers/gpu/drm/radeon/rs600.c
>> +++ b/drivers/gpu/drm/radeon/rs600.c
>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
>>                  return -EINVAL;
>>          }
>>          addr = addr & 0xFFFFFFFFFFFFF000ULL;
>> -       addr |= R600_PTE_GART;
>> +       if (addr == rdev->dummy_page.addr)
>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>> +       else
>> +               addr |= R600_PTE_GART;
>>          writeq(addr, ptr + (i * 8));
>>          return 0;
>>   }
>> --
>> 1.9.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-11  9:29   ` Christian König
@ 2014-06-11 10:56     ` Marek Olšák
  2014-06-12 11:23       ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Marek Olšák @ 2014-06-11 10:56 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

I only tested Bonaire. I can test Cape Verde if needed.

Marek

On Wed, Jun 11, 2014 at 11:29 AM, Christian König
<deathsimple@vodafone.de> wrote:
> Crap, I already wanted to check back with you if that really fixes your
> problems.
>
> Thanks for the info, this crash also only happens on CIK doesn't it?
>
> Christian.
>
> Am 11.06.2014 01:30, schrieb Marek Olšák:
>
>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>> machine.
>>
>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>> SDMA to CP DMA, which hung too.
>>
>> I also tried this:
>>
>> git checkout (the problematic commit):
>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>
>> git cherry-pick (fixes):
>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>> 4906f689 - drm/radeon: fix page directory update size estimation
>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>
>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>
>> Testing was done with piglit / quick.tests.
>>
>> Marek
>>
>>
>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König <deathsimple@vodafone.de>
>> wrote:
>>>
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> When we set the valid bit on invalid GART entries they are
>>> loaded into the TLB when an adjacent entry is loaded. This
>>> poisons the TLB with invalid entries which are sometimes
>>> not correctly removed on TLB flush.
>>>
>>> For stable inclusion the patch probably needs to be modified a bit.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Cc: stable@vger.kernel.org
>>> ---
>>>   drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>> b/drivers/gpu/drm/radeon/rs600.c
>>> index 0a8be63..e0465b2 100644
>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev,
>>> int i, uint64_t addr)
>>>                  return -EINVAL;
>>>          }
>>>          addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>> -       addr |= R600_PTE_GART;
>>> +       if (addr == rdev->dummy_page.addr)
>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>> +       else
>>> +               addr |= R600_PTE_GART;
>>>          writeq(addr, ptr + (i * 8));
>>>          return 0;
>>>   }
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-11 10:56     ` Marek Olšák
@ 2014-06-12 11:23       ` Christian König
  2014-06-13 13:19         ` Marek Olšák
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-12 11:23 UTC (permalink / raw)
  To: Marek Olšák; +Cc: dri-devel

Please do so, and you might want to try 3.15.0 as well.

I've tested multiple piglit runs over night with my Bonaire and 3.15.0 
and that seemed to work perfectly fine.

Going to test Alex drm-next-3.16 a bit more as well.

Christian.

Am 11.06.2014 12:56, schrieb Marek Olšák:
> I only tested Bonaire. I can test Cape Verde if needed.
>
> Marek
>
> On Wed, Jun 11, 2014 at 11:29 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Crap, I already wanted to check back with you if that really fixes your
>> problems.
>>
>> Thanks for the info, this crash also only happens on CIK doesn't it?
>>
>> Christian.
>>
>> Am 11.06.2014 01:30, schrieb Marek Olšák:
>>
>>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>>> machine.
>>>
>>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>>> SDMA to CP DMA, which hung too.
>>>
>>> I also tried this:
>>>
>>> git checkout (the problematic commit):
>>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>>
>>> git cherry-pick (fixes):
>>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>>> 4906f689 - drm/radeon: fix page directory update size estimation
>>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>>
>>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>>
>>> Testing was done with piglit / quick.tests.
>>>
>>> Marek
>>>
>>>
>>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König <deathsimple@vodafone.de>
>>> wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> When we set the valid bit on invalid GART entries they are
>>>> loaded into the TLB when an adjacent entry is loaded. This
>>>> poisons the TLB with invalid entries which are sometimes
>>>> not correctly removed on TLB flush.
>>>>
>>>> For stable inclusion the patch probably needs to be modified a bit.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Cc: stable@vger.kernel.org
>>>> ---
>>>>    drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>>    1 file changed, 4 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>>> b/drivers/gpu/drm/radeon/rs600.c
>>>> index 0a8be63..e0465b2 100644
>>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device *rdev,
>>>> int i, uint64_t addr)
>>>>                   return -EINVAL;
>>>>           }
>>>>           addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>>> -       addr |= R600_PTE_GART;
>>>> +       if (addr == rdev->dummy_page.addr)
>>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>>> +       else
>>>> +               addr |= R600_PTE_GART;
>>>>           writeq(addr, ptr + (i * 8));
>>>>           return 0;
>>>>    }
>>>> --
>>>> 1.9.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-12 11:23       ` Christian König
@ 2014-06-13 13:19         ` Marek Olšák
  2014-06-13 15:45           ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Marek Olšák @ 2014-06-13 13:19 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 3625 bytes --]

Hi,

With my "force_gtt" patch, Cape Verde is unstable too, so all GCN
chips are affected.

I recommend applying that patch, because it will reproduce the problem
faster. Without it, the hangs are very rare and it may take a while
before they occur.

Marek

On Thu, Jun 12, 2014 at 1:23 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Please do so, and you might want to try 3.15.0 as well.
>
> I've tested multiple piglit runs over night with my Bonaire and 3.15.0 and
> that seemed to work perfectly fine.
>
> Going to test Alex drm-next-3.16 a bit more as well.
>
> Christian.
>
> Am 11.06.2014 12:56, schrieb Marek Olšák:
>
>> I only tested Bonaire. I can test Cape Verde if needed.
>>
>> Marek
>>
>> On Wed, Jun 11, 2014 at 11:29 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> Crap, I already wanted to check back with you if that really fixes your
>>> problems.
>>>
>>> Thanks for the info, this crash also only happens on CIK doesn't it?
>>>
>>> Christian.
>>>
>>> Am 11.06.2014 01:30, schrieb Marek Olšák:
>>>
>>>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>>>> machine.
>>>>
>>>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>>>> SDMA to CP DMA, which hung too.
>>>>
>>>> I also tried this:
>>>>
>>>> git checkout (the problematic commit):
>>>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>>>
>>>> git cherry-pick (fixes):
>>>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>>>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>>>> 4906f689 - drm/radeon: fix page directory update size estimation
>>>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>>>
>>>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>>>
>>>> Testing was done with piglit / quick.tests.
>>>>
>>>> Marek
>>>>
>>>>
>>>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König
>>>> <deathsimple@vodafone.de>
>>>> wrote:
>>>>>
>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> When we set the valid bit on invalid GART entries they are
>>>>> loaded into the TLB when an adjacent entry is loaded. This
>>>>> poisons the TLB with invalid entries which are sometimes
>>>>> not correctly removed on TLB flush.
>>>>>
>>>>> For stable inclusion the patch probably needs to be modified a bit.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> Cc: stable@vger.kernel.org
>>>>> ---
>>>>>    drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>>>    1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>>>> b/drivers/gpu/drm/radeon/rs600.c
>>>>> index 0a8be63..e0465b2 100644
>>>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device
>>>>> *rdev,
>>>>> int i, uint64_t addr)
>>>>>                   return -EINVAL;
>>>>>           }
>>>>>           addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>>>> -       addr |= R600_PTE_GART;
>>>>> +       if (addr == rdev->dummy_page.addr)
>>>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>>>> +       else
>>>>> +               addr |= R600_PTE_GART;
>>>>>           writeq(addr, ptr + (i * 8));
>>>>>           return 0;
>>>>>    }
>>>>> --
>>>>> 1.9.1
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel@lists.freedesktop.org
>>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>
>>>
>

[-- Attachment #2: 0001-force_gtt.patch --]
[-- Type: text/x-patch, Size: 1537 bytes --]

From 504c27c21131f0a2b472e8531ed4630454fe1471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 13 Jun 2014 15:17:26 +0200
Subject: [PATCH] force_gtt

---
 drivers/gpu/drm/radeon/radeon_vm.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index c11b71d..67f7658 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -116,6 +116,19 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
 	rdev->vm_manager.enabled = false;
 }
 
+static void force_gtt(struct radeon_bo *bo)
+{
+	if (radeon_bo_reserve(bo, false))
+		return;
+
+	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+
+	if (ttm_bo_validate(&bo->tbo, &bo->placement, true, false)) {
+		DRM_ERROR("failed to force a GTT placement\n");
+	}
+	radeon_bo_unreserve(bo);
+}
+
 /**
  * radeon_vm_get_bos - add the vm BOs to a validation list
  *
@@ -147,6 +160,8 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
 	list[0].handle = 0;
 	list_add(&list[0].tv.head, head);
 
+	force_gtt(vm->page_directory);
+
 	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
 		if (!vm->page_tables[i].bo)
 			continue;
@@ -159,6 +174,8 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
 		list[idx].tiling_flags = 0;
 		list[idx].handle = 0;
 		list_add(&list[idx++].tv.head, head);
+
+		force_gtt(vm->page_tables[i].bo);
 	}
 
 	return list;
-- 
1.9.1


[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-13 13:19         ` Marek Olšák
@ 2014-06-13 15:45           ` Christian König
  2014-06-13 21:31             ` Alex Deucher
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-13 15:45 UTC (permalink / raw)
  To: Marek Olšák; +Cc: dri-devel

Hi Marek,

ah, yes! Piglit in combination with that patch can indeed crash the box.

Going to investigate now that I can reproduce it.

Thanks,
Christian.

Am 13.06.2014 15:19, schrieb Marek Olšák:
> Hi,
>
> With my "force_gtt" patch, Cape Verde is unstable too, so all GCN
> chips are affected.
>
> I recommend applying that patch, because it will reproduce the problem
> faster. Without it, the hangs are very rare and it may take a while
> before they occur.
>
> Marek
>
> On Thu, Jun 12, 2014 at 1:23 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Please do so, and you might want to try 3.15.0 as well.
>>
>> I've tested multiple piglit runs over night with my Bonaire and 3.15.0 and
>> that seemed to work perfectly fine.
>>
>> Going to test Alex drm-next-3.16 a bit more as well.
>>
>> Christian.
>>
>> Am 11.06.2014 12:56, schrieb Marek Olšák:
>>
>>> I only tested Bonaire. I can test Cape Verde if needed.
>>>
>>> Marek
>>>
>>> On Wed, Jun 11, 2014 at 11:29 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Crap, I already wanted to check back with you if that really fixes your
>>>> problems.
>>>>
>>>> Thanks for the info, this crash also only happens on CIK doesn't it?
>>>>
>>>> Christian.
>>>>
>>>> Am 11.06.2014 01:30, schrieb Marek Olšák:
>>>>
>>>>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>>>>> machine.
>>>>>
>>>>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>>>>> SDMA to CP DMA, which hung too.
>>>>>
>>>>> I also tried this:
>>>>>
>>>>> git checkout (the problematic commit):
>>>>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>>>>
>>>>> git cherry-pick (fixes):
>>>>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>>>>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>>>>> 4906f689 - drm/radeon: fix page directory update size estimation
>>>>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>>>>
>>>>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>>>>
>>>>> Testing was done with piglit / quick.tests.
>>>>>
>>>>> Marek
>>>>>
>>>>>
>>>>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König
>>>>> <deathsimple@vodafone.de>
>>>>> wrote:
>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> When we set the valid bit on invalid GART entries they are
>>>>>> loaded into the TLB when an adjacent entry is loaded. This
>>>>>> poisons the TLB with invalid entries which are sometimes
>>>>>> not correctly removed on TLB flush.
>>>>>>
>>>>>> For stable inclusion the patch probably needs to be modified a bit.
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> Cc: stable@vger.kernel.org
>>>>>> ---
>>>>>>     drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>>>>     1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>>>>> b/drivers/gpu/drm/radeon/rs600.c
>>>>>> index 0a8be63..e0465b2 100644
>>>>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>>>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>>>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device
>>>>>> *rdev,
>>>>>> int i, uint64_t addr)
>>>>>>                    return -EINVAL;
>>>>>>            }
>>>>>>            addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>>>>> -       addr |= R600_PTE_GART;
>>>>>> +       if (addr == rdev->dummy_page.addr)
>>>>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>>>>> +       else
>>>>>> +               addr |= R600_PTE_GART;
>>>>>>            writeq(addr, ptr + (i * 8));
>>>>>>            return 0;
>>>>>>     }
>>>>>> --
>>>>>> 1.9.1
>>>>>>
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-13 15:45           ` Christian König
@ 2014-06-13 21:31             ` Alex Deucher
  2014-06-15 12:48               ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Alex Deucher @ 2014-06-13 21:31 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fri, Jun 13, 2014 at 11:45 AM, Christian König
<deathsimple@vodafone.de> wrote:
> Hi Marek,
>
> ah, yes! Piglit in combination with that patch can indeed crash the box.
>
> Going to investigate now that I can reproduce it.

I wonder if it's a clockgating issue with the MC or BIF?  You might
try adjusting the rdev->cg_flags (try setting it to 0) in
radeon_asic.c or disabling dpm.

Alex

>
> Thanks,
> Christian.
>
> Am 13.06.2014 15:19, schrieb Marek Olšák:
>
>> Hi,
>>
>> With my "force_gtt" patch, Cape Verde is unstable too, so all GCN
>> chips are affected.
>>
>> I recommend applying that patch, because it will reproduce the problem
>> faster. Without it, the hangs are very rare and it may take a while
>> before they occur.
>>
>> Marek
>>
>> On Thu, Jun 12, 2014 at 1:23 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> Please do so, and you might want to try 3.15.0 as well.
>>>
>>> I've tested multiple piglit runs over night with my Bonaire and 3.15.0
>>> and
>>> that seemed to work perfectly fine.
>>>
>>> Going to test Alex drm-next-3.16 a bit more as well.
>>>
>>> Christian.
>>>
>>> Am 11.06.2014 12:56, schrieb Marek Olšák:
>>>
>>>> I only tested Bonaire. I can test Cape Verde if needed.
>>>>
>>>> Marek
>>>>
>>>> On Wed, Jun 11, 2014 at 11:29 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> Crap, I already wanted to check back with you if that really fixes your
>>>>> problems.
>>>>>
>>>>> Thanks for the info, this crash also only happens on CIK doesn't it?
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 11.06.2014 01:30, schrieb Marek Olšák:
>>>>>
>>>>>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>>>>>> machine.
>>>>>>
>>>>>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>>>>>> SDMA to CP DMA, which hung too.
>>>>>>
>>>>>> I also tried this:
>>>>>>
>>>>>> git checkout (the problematic commit):
>>>>>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>>>>>
>>>>>> git cherry-pick (fixes):
>>>>>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>>>>>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>>>>>> 4906f689 - drm/radeon: fix page directory update size estimation
>>>>>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>>>>>
>>>>>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>>>>>
>>>>>> Testing was done with piglit / quick.tests.
>>>>>>
>>>>>> Marek
>>>>>>
>>>>>>
>>>>>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König
>>>>>> <deathsimple@vodafone.de>
>>>>>> wrote:
>>>>>>>
>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>
>>>>>>> When we set the valid bit on invalid GART entries they are
>>>>>>> loaded into the TLB when an adjacent entry is loaded. This
>>>>>>> poisons the TLB with invalid entries which are sometimes
>>>>>>> not correctly removed on TLB flush.
>>>>>>>
>>>>>>> For stable inclusion the patch probably needs to be modified a bit.
>>>>>>>
>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>> Cc: stable@vger.kernel.org
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>>>>>     1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>>>>>> b/drivers/gpu/drm/radeon/rs600.c
>>>>>>> index 0a8be63..e0465b2 100644
>>>>>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>>>>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>>>>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device
>>>>>>> *rdev,
>>>>>>> int i, uint64_t addr)
>>>>>>>                    return -EINVAL;
>>>>>>>            }
>>>>>>>            addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>>>>>> -       addr |= R600_PTE_GART;
>>>>>>> +       if (addr == rdev->dummy_page.addr)
>>>>>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>>>>>> +       else
>>>>>>> +               addr |= R600_PTE_GART;
>>>>>>>            writeq(addr, ptr + (i * 8));
>>>>>>>            return 0;
>>>>>>>     }
>>>>>>> --
>>>>>>> 1.9.1
>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> dri-devel mailing list
>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>
>>>>>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-13 21:31             ` Alex Deucher
@ 2014-06-15 12:48               ` Christian König
  2014-06-19  1:48                 ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-15 12:48 UTC (permalink / raw)
  To: Alex Deucher; +Cc: dri-devel

Am 13.06.2014 23:31, schrieb Alex Deucher:
> On Fri, Jun 13, 2014 at 11:45 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Hi Marek,
>>
>> ah, yes! Piglit in combination with that patch can indeed crash the box.
>>
>> Going to investigate now that I can reproduce it.
> I wonder if it's a clockgating issue with the MC or BIF?  You might
> try adjusting the rdev->cg_flags (try setting it to 0) in
> radeon_asic.c or disabling dpm.

Unfortunately that was just a false alarm.

I was just on a branch which didn't had the "stop poisoning the GART 
TLB" patch, after applying this patch I can again let piglit run for the 
whole night without a lockup.

No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop 
poisoning the GART TLB"+"force_gtt" is rock solid here.

Christian.

>
> Alex
>
>> Thanks,
>> Christian.
>>
>> Am 13.06.2014 15:19, schrieb Marek Olšák:
>>
>>> Hi,
>>>
>>> With my "force_gtt" patch, Cape Verde is unstable too, so all GCN
>>> chips are affected.
>>>
>>> I recommend applying that patch, because it will reproduce the problem
>>> faster. Without it, the hangs are very rare and it may take a while
>>> before they occur.
>>>
>>> Marek
>>>
>>> On Thu, Jun 12, 2014 at 1:23 PM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Please do so, and you might want to try 3.15.0 as well.
>>>>
>>>> I've tested multiple piglit runs over night with my Bonaire and 3.15.0
>>>> and
>>>> that seemed to work perfectly fine.
>>>>
>>>> Going to test Alex drm-next-3.16 a bit more as well.
>>>>
>>>> Christian.
>>>>
>>>> Am 11.06.2014 12:56, schrieb Marek Olšák:
>>>>
>>>>> I only tested Bonaire. I can test Cape Verde if needed.
>>>>>
>>>>> Marek
>>>>>
>>>>> On Wed, Jun 11, 2014 at 11:29 AM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>> Crap, I already wanted to check back with you if that really fixes your
>>>>>> problems.
>>>>>>
>>>>>> Thanks for the info, this crash also only happens on CIK doesn't it?
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>> Am 11.06.2014 01:30, schrieb Marek Olšák:
>>>>>>
>>>>>>> Sorry to tell you the bad news. This patch doesn't fix the hangs on my
>>>>>>> machine.
>>>>>>>
>>>>>>> I tested drm-next-3.16 from Alex's tree. I also switched copying from
>>>>>>> SDMA to CP DMA, which hung too.
>>>>>>>
>>>>>>> I also tried this:
>>>>>>>
>>>>>>> git checkout (the problematic commit):
>>>>>>> 6d2f294 - drm/radeon: use normal BOs for the page tables v4
>>>>>>>
>>>>>>> git cherry-pick (fixes):
>>>>>>> 0e97703c - drm/radeon: add define for flags used in R600+ GTT
>>>>>>> 0986c1a5 - drm/radeon: stop poisoning the GART TLB
>>>>>>> 4906f689 - drm/radeon: fix page directory update size estimation
>>>>>>> 4b095566 - drm/radeon: fix buffer placement under memory pressure v2
>>>>>>>
>>>>>>> Then I tested both SDMA and CP DMA copying. Both were unstable.
>>>>>>>
>>>>>>> Testing was done with piglit / quick.tests.
>>>>>>>
>>>>>>> Marek
>>>>>>>
>>>>>>>
>>>>>>> On Wed, Jun 4, 2014 at 3:29 PM, Christian König
>>>>>>> <deathsimple@vodafone.de>
>>>>>>> wrote:
>>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>>
>>>>>>>> When we set the valid bit on invalid GART entries they are
>>>>>>>> loaded into the TLB when an adjacent entry is loaded. This
>>>>>>>> poisons the TLB with invalid entries which are sometimes
>>>>>>>> not correctly removed on TLB flush.
>>>>>>>>
>>>>>>>> For stable inclusion the patch probably needs to be modified a bit.
>>>>>>>>
>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>> ---
>>>>>>>>      drivers/gpu/drm/radeon/rs600.c | 5 ++++-
>>>>>>>>      1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/radeon/rs600.c
>>>>>>>> b/drivers/gpu/drm/radeon/rs600.c
>>>>>>>> index 0a8be63..e0465b2 100644
>>>>>>>> --- a/drivers/gpu/drm/radeon/rs600.c
>>>>>>>> +++ b/drivers/gpu/drm/radeon/rs600.c
>>>>>>>> @@ -634,7 +634,10 @@ int rs600_gart_set_page(struct radeon_device
>>>>>>>> *rdev,
>>>>>>>> int i, uint64_t addr)
>>>>>>>>                     return -EINVAL;
>>>>>>>>             }
>>>>>>>>             addr = addr & 0xFFFFFFFFFFFFF000ULL;
>>>>>>>> -       addr |= R600_PTE_GART;
>>>>>>>> +       if (addr == rdev->dummy_page.addr)
>>>>>>>> +               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
>>>>>>>> +       else
>>>>>>>> +               addr |= R600_PTE_GART;
>>>>>>>>             writeq(addr, ptr + (i * 8));
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> --
>>>>>>>> 1.9.1
>>>>>>>>
>>>>>>>> _______________________________________________
>>>>>>>> dri-devel mailing list
>>>>>>>> dri-devel@lists.freedesktop.org
>>>>>>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-15 12:48               ` Christian König
@ 2014-06-19  1:48                 ` Michel Dänzer
  2014-06-19  9:45                   ` Christian König
  2014-06-19 10:20                   ` Marek Olšák
  0 siblings, 2 replies; 36+ messages in thread
From: Michel Dänzer @ 2014-06-19  1:48 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 15.06.2014 21:48, Christian König wrote:
> Am 13.06.2014 23:31, schrieb Alex Deucher:
>> On Fri, Jun 13, 2014 at 11:45 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> Hi Marek,
>>>
>>> ah, yes! Piglit in combination with that patch can indeed crash the box.
>>>
>>> Going to investigate now that I can reproduce it.
>> I wonder if it's a clockgating issue with the MC or BIF?  You might
>> try adjusting the rdev->cg_flags (try setting it to 0) in
>> radeon_asic.c or disabling dpm.
> 
> Unfortunately that was just a false alarm.
> 
> I was just on a branch which didn't had the "stop poisoning the GART
> TLB" patch, after applying this patch I can again let piglit run for the
> whole night without a lockup.
> 
> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
> poisoning the GART TLB"+"force_gtt" is rock solid here.

FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
fine. 3.15 seems stable on Kaveri though, but I haven't tried the
force_gtt patch on that yet.

There have also been a number of bug reports about stability regressions
in 3.15 on various SI and CIK cards. It seems likely that at least some
of those are related to this issue as well.

If we can't figure out the problem soon, we probably need to revert the
'Use normal BOs for page tables' and dependent changes at least for 3.15.y?


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-19  1:48                 ` Michel Dänzer
@ 2014-06-19  9:45                   ` Christian König
  2014-06-23  8:15                     ` Michel Dänzer
  2014-06-19 10:20                   ` Marek Olšák
  1 sibling, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-19  9:45 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 2191 bytes --]

Am 19.06.2014 03:48, schrieb Michel Dänzer:
> On 15.06.2014 21:48, Christian König wrote:
>> Am 13.06.2014 23:31, schrieb Alex Deucher:
>>> On Fri, Jun 13, 2014 at 11:45 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Hi Marek,
>>>>
>>>> ah, yes! Piglit in combination with that patch can indeed crash the box.
>>>>
>>>> Going to investigate now that I can reproduce it.
>>> I wonder if it's a clockgating issue with the MC or BIF?  You might
>>> try adjusting the rdev->cg_flags (try setting it to 0) in
>>> radeon_asic.c or disabling dpm.
>> Unfortunately that was just a false alarm.
>>
>> I was just on a branch which didn't had the "stop poisoning the GART
>> TLB" patch, after applying this patch I can again let piglit run for the
>> whole night without a lockup.
>>
>> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
>> poisoning the GART TLB"+"force_gtt" is rock solid here.
> FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
> fine. 3.15 seems stable on Kaveri though, but I haven't tried the
> force_gtt patch on that yet.

Yeah, I think it's just me who has a stable system with 3.15 and that 
annoys me quite a bit.

No idea what's the difference. What versions of LLVM/Mesa/Piglit are you 
using for the test?

>
> There have also been a number of bug reports about stability regressions
> in 3.15 on various SI and CIK cards. It seems likely that at least some
> of those are related to this issue as well.
>
> If we can't figure out the problem soon, we probably need to revert the
> 'Use normal BOs for page tables' and dependent changes at least for 3.15.y?

I thought about this for the whole 3.15 release cycle, but decided 
against it. But what we could do is applying the attached trivial patch, 
it pins down the page tables and so pretty much reverts to the old behavior.

I think even when we revert to the old code we have a couple of unsolved 
problems with the VM support or in the driver in general where we should 
try to understand the underlying reason for it instead of applying more 
workarounds.

Going to try harder crashing my 3.15 system,
Christian.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-drm-radeon-pin-down-page-tables.patch --]
[-- Type: text/x-diff; name="0001-drm-radeon-pin-down-page-tables.patch", Size: 1039 bytes --]

>From 43973a95578c3ef0f20d3ad997549ef588ff0f29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 19 Jun 2014 11:40:38 +0200
Subject: [PATCH] drm/radeon: pin down page tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid evicting them from VRAM for now.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 4c18346..3029aad 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -368,7 +368,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
         if (r)
 		return r;
 
-        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+        r = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_VRAM, NULL);
         if (r)
                 goto error;
 
-- 
1.9.1


[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-19  1:48                 ` Michel Dänzer
  2014-06-19  9:45                   ` Christian König
@ 2014-06-19 10:20                   ` Marek Olšák
  2014-06-19 10:25                     ` Christian König
  2014-06-20  1:10                     ` Michel Dänzer
  1 sibling, 2 replies; 36+ messages in thread
From: Marek Olšák @ 2014-06-19 10:20 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

Hi Michel,

3.15 doesn't contain Christian's fix yet, so it should be always
broken for everybody. The fix is currently only in 3.16.

Alternatively, you can cherry-pick the fix to 3.15, but it doesn't
apply cleanly.

There is a workaround in 3.15 which disables sDMA and uses CP DMA for
copying buffers. It seems to help Christian's machine, but not mine.

When I said the kernel driver was broken, I meant that
it was broken *with* the fix applied regardless of which engine was
used for the copying.

Marek

On Thu, Jun 19, 2014 at 3:48 AM, Michel Dänzer <michel@daenzer.net> wrote:
> On 15.06.2014 21:48, Christian König wrote:
>> Am 13.06.2014 23:31, schrieb Alex Deucher:
>>> On Fri, Jun 13, 2014 at 11:45 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Hi Marek,
>>>>
>>>> ah, yes! Piglit in combination with that patch can indeed crash the box.
>>>>
>>>> Going to investigate now that I can reproduce it.
>>> I wonder if it's a clockgating issue with the MC or BIF?  You might
>>> try adjusting the rdev->cg_flags (try setting it to 0) in
>>> radeon_asic.c or disabling dpm.
>>
>> Unfortunately that was just a false alarm.
>>
>> I was just on a branch which didn't had the "stop poisoning the GART
>> TLB" patch, after applying this patch I can again let piglit run for the
>> whole night without a lockup.
>>
>> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
>> poisoning the GART TLB"+"force_gtt" is rock solid here.
>
> FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
> fine. 3.15 seems stable on Kaveri though, but I haven't tried the
> force_gtt patch on that yet.
>
> There have also been a number of bug reports about stability regressions
> in 3.15 on various SI and CIK cards. It seems likely that at least some
> of those are related to this issue as well.
>
> If we can't figure out the problem soon, we probably need to revert the
> 'Use normal BOs for page tables' and dependent changes at least for 3.15.y?
>
>
> --
> Earthling Michel Dänzer            |                  http://www.amd.com
> Libre software enthusiast          |                Mesa and X developer
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-19 10:20                   ` Marek Olšák
@ 2014-06-19 10:25                     ` Christian König
  2014-06-20  1:10                     ` Michel Dänzer
  1 sibling, 0 replies; 36+ messages in thread
From: Christian König @ 2014-06-19 10:25 UTC (permalink / raw)
  To: Marek Olšák, Michel Dänzer; +Cc: dri-devel

Hi Marek,

> There is a workaround in 3.15 which disables sDMA and uses CP DMA for
> copying buffers. It seems to help Christian's machine, but not mine.
With stressing the box with piglit I was able to bring my machine down 
with the CP DMA as well, only cherry-picking the "stop poisoning the 
GART TLB" really fixed that issue.

But I'm pretty sure that even with "stop poisoning the GART TLB" 
back-ported we still have at least one stability issue I can't reproduce.

Christian.

Am 19.06.2014 12:20, schrieb Marek Olšák:
> Hi Michel,
>
> 3.15 doesn't contain Christian's fix yet, so it should be always
> broken for everybody. The fix is currently only in 3.16.
>
> Alternatively, you can cherry-pick the fix to 3.15, but it doesn't
> apply cleanly.
>
> There is a workaround in 3.15 which disables sDMA and uses CP DMA for
> copying buffers. It seems to help Christian's machine, but not mine.
>
> When I said the kernel driver was broken, I meant that
> it was broken *with* the fix applied regardless of which engine was
> used for the copying.
>
> Marek
>
> On Thu, Jun 19, 2014 at 3:48 AM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 15.06.2014 21:48, Christian König wrote:
>>> Am 13.06.2014 23:31, schrieb Alex Deucher:
>>>> On Fri, Jun 13, 2014 at 11:45 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> Hi Marek,
>>>>>
>>>>> ah, yes! Piglit in combination with that patch can indeed crash the box.
>>>>>
>>>>> Going to investigate now that I can reproduce it.
>>>> I wonder if it's a clockgating issue with the MC or BIF?  You might
>>>> try adjusting the rdev->cg_flags (try setting it to 0) in
>>>> radeon_asic.c or disabling dpm.
>>> Unfortunately that was just a false alarm.
>>>
>>> I was just on a branch which didn't had the "stop poisoning the GART
>>> TLB" patch, after applying this patch I can again let piglit run for the
>>> whole night without a lockup.
>>>
>>> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
>>> poisoning the GART TLB"+"force_gtt" is rock solid here.
>> FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
>> fine. 3.15 seems stable on Kaveri though, but I haven't tried the
>> force_gtt patch on that yet.
>>
>> There have also been a number of bug reports about stability regressions
>> in 3.15 on various SI and CIK cards. It seems likely that at least some
>> of those are related to this issue as well.
>>
>> If we can't figure out the problem soon, we probably need to revert the
>> 'Use normal BOs for page tables' and dependent changes at least for 3.15.y?
>>
>>
>> --
>> Earthling Michel Dänzer            |                  http://www.amd.com
>> Libre software enthusiast          |                Mesa and X developer
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-19 10:20                   ` Marek Olšák
  2014-06-19 10:25                     ` Christian König
@ 2014-06-20  1:10                     ` Michel Dänzer
  1 sibling, 0 replies; 36+ messages in thread
From: Michel Dänzer @ 2014-06-20  1:10 UTC (permalink / raw)
  To: Marek Olšák; +Cc: dri-devel

On 19.06.2014 19:20, Marek Olšák wrote:
> Hi Michel,
> 
> 3.15 doesn't contain Christian's fix yet, so it should be always
> broken for everybody. The fix is currently only in 3.16.
> 
> Alternatively, you can cherry-pick the fix to 3.15, but it doesn't
> apply cleanly.

That's a good point. Sorry, I should have mentioned I've been testing
with the GART poisoning fix backported to 3.15.


> There is a workaround in 3.15 which disables sDMA and uses CP DMA for
> copying buffers. It seems to help Christian's machine, but not mine.

I've been testing with CP DMA on Bonaire FWIW.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-19  9:45                   ` Christian König
@ 2014-06-23  8:15                     ` Michel Dänzer
  2014-06-23  9:56                       ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-06-23  8:15 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 19.06.2014 18:45, Christian König wrote:
> Am 19.06.2014 03:48, schrieb Michel Dänzer:
>> On 15.06.2014 21:48, Christian König wrote:
>>>
>>> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
>>> poisoning the GART TLB"+"force_gtt" is rock solid here.
>> FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
>> fine. 3.15 seems stable on Kaveri though, but I haven't tried the
>> force_gtt patch on that yet.
> 
> Yeah, I think it's just me who has a stable system with 3.15 and that
> annoys me quite a bit.

FWIW though, my Kaveri doesn't always survive piglit either, e.g. this
morning it didn't once again, then did after a reboot. (That's using
SDMA; Kaveri was never switched back to CPDMA)


> No idea what's the difference. What versions of LLVM/Mesa/Piglit are you
> using for the test?

Current Git of everything.


>> There have also been a number of bug reports about stability regressions
>> in 3.15 on various SI and CIK cards. It seems likely that at least some
>> of those are related to this issue as well.
>>
>> If we can't figure out the problem soon, we probably need to revert the
>> 'Use normal BOs for page tables' and dependent changes at least for
>> 3.15.y?
> 
> I thought about this for the whole 3.15 release cycle, but decided
> against it. But what we could do is applying the attached trivial patch,
> it pins down the page tables and so pretty much reverts to the old
> behavior.

This patch applied on top of 3.15 + stop poisoning the GART TLB doesn't
seem to help on my Bonaire, unfortunately.


> I think even when we revert to the old code we have a couple of unsolved
> problems with the VM support or in the driver in general where we should
> try to understand the underlying reason for it instead of applying more
> workarounds.

I'm not suggesting applying more workarounds but going back to a known
more stable state. It seems like we've maneuvered ourselves to a rather
uncomfortable position from there, with no clear way to a better place.
But if we basically started from the 3.14 state again, we have a few
known hurdles like mine and Marek's Bonaire etc. which we know any
further improvements will have to pass before they can be considered for
general consumption.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-23  8:15                     ` Michel Dänzer
@ 2014-06-23  9:56                       ` Christian König
  2014-06-24  6:49                         ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-23  9:56 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 2853 bytes --]

Am 23.06.2014 10:15, schrieb Michel Dänzer:
> On 19.06.2014 18:45, Christian König wrote:
>> Am 19.06.2014 03:48, schrieb Michel Dänzer:
>>> On 15.06.2014 21:48, Christian König wrote:
>>>> No idea what goes wrong when Marek runs piglit, but 3.15.0+"stop
>>>> poisoning the GART TLB"+"force_gtt" is rock solid here.
>>> FWIW, 3.15 doesn't survive piglit on my Bonaire either, but 3.14 is
>>> fine. 3.15 seems stable on Kaveri though, but I haven't tried the
>>> force_gtt patch on that yet.
>> Yeah, I think it's just me who has a stable system with 3.15 and that
>> annoys me quite a bit.
> FWIW though, my Kaveri doesn't always survive piglit either, e.g. this
> morning it didn't once again, then did after a reboot. (That's using
> SDMA; Kaveri was never switched back to CPDMA)
>
>
>> No idea what's the difference. What versions of LLVM/Mesa/Piglit are you
>> using for the test?
> Current Git of everything.
>
>
>>> There have also been a number of bug reports about stability regressions
>>> in 3.15 on various SI and CIK cards. It seems likely that at least some
>>> of those are related to this issue as well.
>>>
>>> If we can't figure out the problem soon, we probably need to revert the
>>> 'Use normal BOs for page tables' and dependent changes at least for
>>> 3.15.y?
>> I thought about this for the whole 3.15 release cycle, but decided
>> against it. But what we could do is applying the attached trivial patch,
>> it pins down the page tables and so pretty much reverts to the old
>> behavior.
> This patch applied on top of 3.15 + stop poisoning the GART TLB doesn't
> seem to help on my Bonaire, unfortunately.

That's unfortunately what I already expected. Making the page tables 
movable isn't really the cause of the problem, it must be rather 
something else which is a bit more subtle. Like incorrect aligning 
somewhere or something like this.

>
>> I think even when we revert to the old code we have a couple of unsolved
>> problems with the VM support or in the driver in general where we should
>> try to understand the underlying reason for it instead of applying more
>> workarounds.
> I'm not suggesting applying more workarounds but going back to a known
> more stable state. It seems like we've maneuvered ourselves to a rather
> uncomfortable position from there, with no clear way to a better place.
> But if we basically started from the 3.14 state again, we have a few
> known hurdles like mine and Marek's Bonaire etc. which we know any
> further improvements will have to pass before they can be considered for
> general consumption.

Yeah agree, especially on the uncomfortable position.

Please try with the two attached patches applied on top of 3.15 and 
retest. They should revert back to the old implementation.

Thanks for the help,
Christian.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-drm-radeon-Revert-drop-non-blocking-allocations-from.patch --]
[-- Type: text/x-diff; name="0001-drm-radeon-Revert-drop-non-blocking-allocations-from.patch", Size: 3592 bytes --]

>From 7cea99ee4c92cdb946c7c599119ae2e8fd5e1b95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 23 Jun 2014 11:07:29 +0200
Subject: [PATCH 1/2] drm/radeon: Revert drop non blocking allocations from sub
 allocator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The next revert needs this functionality.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.h    | 2 +-
 drivers/gpu/drm/radeon/radeon_ring.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_sa.c        | 7 +++++--
 drivers/gpu/drm/radeon/radeon_semaphore.c | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 9e7b25a..7dff64d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -180,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
 extern int radeon_sa_bo_new(struct radeon_device *rdev,
 			    struct radeon_sa_manager *sa_manager,
 			    struct radeon_sa_bo **sa_bo,
-			    unsigned size, unsigned align);
+			    unsigned size, unsigned align, bool block);
 extern void radeon_sa_bo_free(struct radeon_device *rdev,
 			      struct radeon_sa_bo **sa_bo,
 			      struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index f8050f5..62201db 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 {
 	int r;
 
-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
+	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
 	if (r) {
 		dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index adcf3e2..c062580 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo **sa_bo,
-		     unsigned size, unsigned align)
+		     unsigned size, unsigned align, bool block)
 {
 	struct radeon_fence *fences[RADEON_NUM_RINGS];
 	unsigned tries[RADEON_NUM_RINGS];
@@ -353,11 +353,14 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 		r = radeon_fence_wait_any(rdev, fences, false);
 		spin_lock(&sa_manager->wq.lock);
 		/* if we have nothing to wait for block */
-		if (r == -ENOENT) {
+		if (r == -ENOENT && block) {
 			r = wait_event_interruptible_locked(
 				sa_manager->wq, 
 				radeon_sa_event(sa_manager, size, align)
 			);
+
+		} else if (r == -ENOENT) {
+			r = -ENOMEM;
 		}
 
 	} while (!r);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index dbd6bcd..6140af6 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 		return -ENOMEM;
 	}
 	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
-			     8 * RADEON_NUM_SYNCS, 8);
+			     8 * RADEON_NUM_SYNCS, 8, true);
 	if (r) {
 		kfree(*semaphore);
 		*semaphore = NULL;
-- 
1.9.1


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0002-drm-radeon-Revert-use-normal-BOs-for-the-page-tables.patch --]
[-- Type: text/x-diff; name="0002-drm-radeon-Revert-use-normal-BOs-for-the-page-tables.patch", Size: 29341 bytes --]

>From e7cdea46fd7553faae2a08dd1bff7cb3009b6bc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 23 Jun 2014 11:08:24 +0200
Subject: [PATCH 2/2] drm/radeon: Revert use normal BOs for the page tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts the commit "use normal BOs for the page tables v4" and the following depending bug fixes:

drm/radeon: sync page table updates
drm/radeon: fix vm buffer size estimation
drm/radeon: only allocate necessary size for vm bo list
drm/radeon: fix page directory update size estimation
drm/radeon: remove global vm lock

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  24 +-
 drivers/gpu/drm/radeon/radeon_cs.c     |  48 ++-
 drivers/gpu/drm/radeon/radeon_device.c |   4 +-
 drivers/gpu/drm/radeon/radeon_kms.c    |  18 +-
 drivers/gpu/drm/radeon/radeon_ring.c   |   7 -
 drivers/gpu/drm/radeon/radeon_vm.c     | 513 ++++++++++++++++++---------------
 6 files changed, 325 insertions(+), 289 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8149e7c..b390d79 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -854,22 +854,17 @@ struct radeon_mec {
 #define R600_PTE_READABLE	(1 << 5)
 #define R600_PTE_WRITEABLE	(1 << 6)
 
-struct radeon_vm_pt {
-	struct radeon_bo		*bo;
-	uint64_t			addr;
-};
-
 struct radeon_vm {
+	struct list_head		list;
 	struct list_head		va;
 	unsigned			id;
 
 	/* contains the page directory */
-	struct radeon_bo		*page_directory;
+	struct radeon_sa_bo		*page_directory;
 	uint64_t			pd_gpu_addr;
-	unsigned			max_pde_used;
 
 	/* array of page tables, one for each page directory entry */
-	struct radeon_vm_pt		*page_tables;
+	struct radeon_sa_bo		**page_tables;
 
 	struct mutex			mutex;
 	/* last fence for cs using this vm */
@@ -881,7 +876,10 @@ struct radeon_vm {
 };
 
 struct radeon_vm_manager {
+	struct mutex			lock;
+	struct list_head		lru_vm;
 	struct radeon_fence		*active[RADEON_NUM_VM];
+	struct radeon_sa_manager	sa_manager;
 	uint32_t			max_pfn;
 	/* number of VMIDs */
 	unsigned			nvm;
@@ -1013,7 +1011,6 @@ struct radeon_cs_parser {
 	unsigned		nrelocs;
 	struct radeon_cs_reloc	*relocs;
 	struct radeon_cs_reloc	**relocs_ptr;
-	struct radeon_cs_reloc	*vm_bos;
 	struct list_head	validated;
 	unsigned		dma_reloc_idx;
 	/* indices of various chunks */
@@ -2807,11 +2804,10 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev,
  */
 int radeon_vm_manager_init(struct radeon_device *rdev);
 void radeon_vm_manager_fini(struct radeon_device *rdev);
-int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
+void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
-					  struct radeon_vm *vm,
-                                          struct list_head *head);
+int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm);
+void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm);
 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring);
 void radeon_vm_flush(struct radeon_device *rdev,
@@ -2821,8 +2817,6 @@ void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
-int radeon_vm_update_page_directory(struct radeon_device *rdev,
-				    struct radeon_vm *vm);
 int radeon_vm_bo_update(struct radeon_device *rdev,
 			struct radeon_vm *vm,
 			struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41ecf8a..06a00a1 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -173,10 +173,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 
 	radeon_cs_buckets_get_list(&buckets, &p->validated);
 
-	if (p->cs_flags & RADEON_CS_USE_VM)
-		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
-					      &p->validated);
-
 	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 }
 
@@ -417,7 +413,6 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	kfree(parser->track);
 	kfree(parser->relocs);
 	kfree(parser->relocs_ptr);
-	kfree(parser->vm_bos);
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
@@ -457,32 +452,24 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 	return r;
 }
 
-static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
+static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
 				   struct radeon_vm *vm)
 {
-	struct radeon_device *rdev = p->rdev;
-	int i, r;
-
-	r = radeon_vm_update_page_directory(rdev, vm);
-	if (r)
-		return r;
+	struct radeon_device *rdev = parser->rdev;
+	struct radeon_cs_reloc *lobj;
+	struct radeon_bo *bo;
+	int r;
 
-	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo,
-				&rdev->ring_tmp_bo.bo->tbo.mem);
-	if (r)
+	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem);
+	if (r) {
 		return r;
-
-	for (i = 0; i < p->nrelocs; i++) {
-		struct radeon_bo *bo;
-
-		/* ignore duplicates */
-		if (p->relocs_ptr[i] != &p->relocs[i])
-			continue;
-
-		bo = p->relocs[i].robj;
-		r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem);
-		if (r)
+	}
+	list_for_each_entry(lobj, &parser->validated, tv.head) {
+		bo = lobj->robj;
+		r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem);
+		if (r) {
 			return r;
+		}
 	}
 	return 0;
 }
@@ -514,13 +501,20 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 		radeon_uvd_note_usage(rdev);
 
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
+	r = radeon_vm_alloc_pt(rdev, vm);
+	if (r) {
+		goto out;
+	}
 	r = radeon_bo_vm_update_pte(parser, vm);
 	if (r) {
 		goto out;
 	}
 	radeon_cs_sync_rings(parser);
 	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
+	radeon_semaphore_sync_to(parser->ib.semaphore,
+				 radeon_vm_grab_id(rdev, vm, parser->ring));
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
@@ -530,7 +524,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	}
 
 out:
+	radeon_vm_add_to_lru(rdev, vm);
 	mutex_unlock(&vm->mutex);
+	mutex_unlock(&rdev->vm_manager.lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2cd144c..9ebd035 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1195,12 +1195,14 @@ int radeon_device_init(struct radeon_device *rdev,
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
-
+	/* initialize vm here */
+	mutex_init(&rdev->vm_manager.lock);
 	/* Adjust VM size here.
 	 * Currently set to 4GB ((1 << 20) 4k pages).
 	 * Max GPUVM size for cayman and SI is 40 bits.
 	 */
 	rdev->vm_manager.max_pfn = 1 << 20;
+	INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba..3708dc7 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -571,7 +571,23 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 			return -ENOMEM;
 		}
 
-		r = radeon_vm_init(rdev, &fpriv->vm);
+		radeon_vm_init(rdev, &fpriv->vm);
+
+		r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
+		if (r) {
+			kfree(fpriv);
+			return r;
+		}
+
+		/* map the ib pool buffer read only into
+		 * virtual address space */
+		bo_va = radeon_vm_bo_add(rdev, &fpriv->vm,
+					 rdev->ring_tmp_bo.bo);
+		r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET,
+					  RADEON_VM_PAGE_READABLE |
+					  RADEON_VM_PAGE_SNOOPED);
+
+		radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
 		if (r) {
 			kfree(fpriv);
 			return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 62201db..4ddc6d77 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -145,13 +145,6 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 		return r;
 	}
 
-	/* grab a vm id if necessary */
-	if (ib->vm) {
-		struct radeon_fence *vm_id_fence;
-		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-        	radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
-	}
-
 	/* sync with other rings */
 	r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index c11b71d..5160176 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -84,19 +84,85 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
  */
 int radeon_vm_manager_init(struct radeon_device *rdev)
 {
+	struct radeon_vm *vm;
+	struct radeon_bo_va *bo_va;
 	int r;
+	unsigned size;
 
 	if (!rdev->vm_manager.enabled) {
+		/* allocate enough for 2 full VM pts */
+		size = radeon_vm_directory_size(rdev);
+		size += rdev->vm_manager.max_pfn * 8;
+		size *= 2;
+		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
+					      RADEON_GPU_PAGE_ALIGN(size),
+					      RADEON_VM_PTB_ALIGN_SIZE,
+					      RADEON_GEM_DOMAIN_VRAM);
+		if (r) {
+			dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
+				(rdev->vm_manager.max_pfn * 8) >> 10);
+			return r;
+		}
+
 		r = radeon_asic_vm_init(rdev);
 		if (r)
 			return r;
 
 		rdev->vm_manager.enabled = true;
+
+		r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
+		if (r)
+			return r;
+	}
+
+	/* restore page table */
+	list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
+		if (vm->page_directory == NULL)
+			continue;
+
+		list_for_each_entry(bo_va, &vm->va, vm_list) {
+			bo_va->valid = false;
+		}
 	}
 	return 0;
 }
 
 /**
+ * radeon_vm_free_pt - free the page table for a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Free the page table of a specific vm (cayman+).
+ *
+ * Global and local mutex must be lock!
+ */
+static void radeon_vm_free_pt(struct radeon_device *rdev,
+				    struct radeon_vm *vm)
+{
+	struct radeon_bo_va *bo_va;
+	int i;
+
+	if (!vm->page_directory)
+		return;
+
+	list_del_init(&vm->list);
+	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+
+	list_for_each_entry(bo_va, &vm->va, vm_list) {
+		bo_va->valid = false;
+	}
+
+	if (vm->page_tables == NULL)
+		return;
+
+	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
+		radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
+
+	kfree(vm->page_tables);
+}
+
+/**
  * radeon_vm_manager_fini - tear down the vm manager
  *
  * @rdev: radeon_device pointer
@@ -105,63 +171,155 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
  */
 void radeon_vm_manager_fini(struct radeon_device *rdev)
 {
+	struct radeon_vm *vm, *tmp;
 	int i;
 
 	if (!rdev->vm_manager.enabled)
 		return;
 
-	for (i = 0; i < RADEON_NUM_VM; ++i)
+	mutex_lock(&rdev->vm_manager.lock);
+	/* free all allocated page tables */
+	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
+		mutex_lock(&vm->mutex);
+		radeon_vm_free_pt(rdev, vm);
+		mutex_unlock(&vm->mutex);
+	}
+	for (i = 0; i < RADEON_NUM_VM; ++i) {
 		radeon_fence_unref(&rdev->vm_manager.active[i]);
+	}
 	radeon_asic_vm_fini(rdev);
+	mutex_unlock(&rdev->vm_manager.lock);
+
+	radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
+	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
 	rdev->vm_manager.enabled = false;
 }
 
 /**
- * radeon_vm_get_bos - add the vm BOs to a validation list
+ * radeon_vm_evict - evict page table to make room for new one
+ *
+ * @rdev: radeon_device pointer
+ * @vm: VM we want to allocate something for
  *
- * @vm: vm providing the BOs
- * @head: head of validation list
+ * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
+ * Returns 0 for success, -ENOMEM for failure.
  *
- * Add the page directory to the list of BOs to
- * validate for command submission (cayman+).
+ * Global and local mutex must be locked!
  */
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
-					  struct radeon_vm *vm,
-					  struct list_head *head)
+static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
 {
-	struct radeon_cs_reloc *list;
-	unsigned i, idx;
+	struct radeon_vm *vm_evict;
 
-	list = kmalloc_array(vm->max_pde_used + 2,
-			     sizeof(struct radeon_cs_reloc), GFP_KERNEL);
-	if (!list)
-		return NULL;
+	if (list_empty(&rdev->vm_manager.lru_vm))
+		return -ENOMEM;
 
-	/* add the vm page table to the list */
-	list[0].gobj = NULL;
-	list[0].robj = vm->page_directory;
-	list[0].domain = RADEON_GEM_DOMAIN_VRAM;
-	list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM;
-	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tiling_flags = 0;
-	list[0].handle = 0;
-	list_add(&list[0].tv.head, head);
-
-	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
-		if (!vm->page_tables[i].bo)
-			continue;
+	vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
+				    struct radeon_vm, list);
+	if (vm_evict == vm)
+		return -ENOMEM;
+
+	mutex_lock(&vm_evict->mutex);
+	radeon_vm_free_pt(rdev, vm_evict);
+	mutex_unlock(&vm_evict->mutex);
+	return 0;
+}
 
-		list[idx].gobj = NULL;
-		list[idx].robj = vm->page_tables[i].bo;
-		list[idx].domain = RADEON_GEM_DOMAIN_VRAM;
-		list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM;
-		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tiling_flags = 0;
-		list[idx].handle = 0;
-		list_add(&list[idx++].tv.head, head);
+/**
+ * radeon_vm_alloc_pt - allocates a page table for a VM
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to bind
+ *
+ * Allocate a page table for the requested vm (cayman+).
+ * Returns 0 for success, error for failure.
+ *
+ * Global and local mutex must be locked!
+ */
+int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	unsigned pd_size, pd_entries, pts_size;
+	struct radeon_ib ib;
+	int r;
+
+	if (vm == NULL) {
+		return -EINVAL;
+	}
+
+	if (vm->page_directory != NULL) {
+		return 0;
+	}
+
+	pd_size = radeon_vm_directory_size(rdev);
+	pd_entries = radeon_vm_num_pdes(rdev);
+
+retry:
+	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
+			     &vm->page_directory, pd_size,
+			     RADEON_VM_PTB_ALIGN_SIZE, false);
+	if (r == -ENOMEM) {
+		r = radeon_vm_evict(rdev, vm);
+		if (r)
+			return r;
+		goto retry;
+
+	} else if (r) {
+		return r;
 	}
 
-	return list;
+	vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
+
+	/* Initially clear the page directory */
+	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
+			  NULL, pd_entries * 2 + 64);
+	if (r) {
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return r;
+	}
+
+	ib.length_dw = 0;
+
+	radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
+				0, pd_entries, 0, 0);
+
+	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return r;
+	}
+	radeon_fence_unref(&vm->fence);
+	vm->fence = radeon_fence_ref(ib.fence);
+	radeon_ib_free(rdev, &ib);
+	radeon_fence_unref(&vm->last_flush);
+
+	/* allocate page table array */
+	pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
+	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+
+	if (vm->page_tables == NULL) {
+		DRM_ERROR("Cannot allocate memory for page table array\n");
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * radeon_vm_add_to_lru - add VMs page table to LRU list
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to add to LRU
+ *
+ * Add the allocated page table to the LRU list (cayman+).
+ *
+ * Global mutex must be locked!
+ */
+void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	list_del_init(&vm->list);
+	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
 }
 
 /**
@@ -235,14 +393,10 @@ void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     int ring)
 {
-	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
-
 	/* if we can't remember our last VM flush then flush now! */
 	/* XXX figure out why we have to flush all the time */
-	if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) {
-		vm->pd_gpu_addr = pd_addr;
+	if (!vm->last_flush || true)
 		radeon_ring_vm_flush(rdev, ring, vm);
-	}
 }
 
 /**
@@ -342,63 +496,6 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
 }
 
 /**
- * radeon_vm_clear_bo - initially clear the page dir/table
- *
- * @rdev: radeon_device pointer
- * @bo: bo to clear
- */
-static int radeon_vm_clear_bo(struct radeon_device *rdev,
-			      struct radeon_bo *bo)
-{
-        struct ttm_validate_buffer tv;
-        struct ww_acquire_ctx ticket;
-        struct list_head head;
-	struct radeon_ib ib;
-	unsigned entries;
-	uint64_t addr;
-	int r;
-
-        memset(&tv, 0, sizeof(tv));
-        tv.bo = &bo->tbo;
-
-        INIT_LIST_HEAD(&head);
-        list_add(&tv.head, &head);
-
-        r = ttm_eu_reserve_buffers(&ticket, &head);
-        if (r)
-		return r;
-
-        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-        if (r)
-                goto error;
-
-	addr = radeon_bo_gpu_offset(bo);
-	entries = radeon_bo_size(bo) / 8;
-
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
-			  NULL, entries * 2 + 64);
-	if (r)
-                goto error;
-
-	ib.length_dw = 0;
-
-	radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0);
-
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r)
-                goto error;
-
-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
-	radeon_ib_free(rdev, &ib);
-
-	return 0;
-
-error:
-	ttm_eu_backoff_reservation(&ticket, &head);
-	return r;
-}
-
-/**
  * radeon_vm_bo_set_addr - set bos virtual address inside a vm
  *
  * @rdev: radeon_device pointer
@@ -422,8 +519,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	struct radeon_vm *vm = bo_va->vm;
 	struct radeon_bo_va *tmp;
 	struct list_head *head;
-	unsigned last_pfn, pt_idx;
-	int r;
+	unsigned last_pfn;
 
 	if (soffset) {
 		/* make sure object fit at this offset */
@@ -474,53 +570,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	bo_va->valid = false;
 	list_move(&bo_va->vm_list, head);
 
-	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-
-	if (eoffset > vm->max_pde_used)
-		vm->max_pde_used = eoffset;
-
-	radeon_bo_unreserve(bo_va->bo);
-
-	/* walk over the address space and allocate the page tables */
-	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
-		struct radeon_bo *pt;
-
-		if (vm->page_tables[pt_idx].bo)
-			continue;
-
-		/* drop mutex to allocate and clear page table */
-		mutex_unlock(&vm->mutex);
-
-		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
-				     RADEON_GPU_PAGE_SIZE, false, 
-				     RADEON_GEM_DOMAIN_VRAM, NULL, &pt);
-		if (r)
-			return r;
-
-		r = radeon_vm_clear_bo(rdev, pt);
-		if (r) {
-			radeon_bo_unref(&pt);
-			radeon_bo_reserve(bo_va->bo, false);
-			return r;
-		}
-
-		/* aquire mutex again */
-		mutex_lock(&vm->mutex);
-		if (vm->page_tables[pt_idx].bo) {
-			/* someone else allocated the pt in the meantime */
-			mutex_unlock(&vm->mutex);
-			radeon_bo_unref(&pt);
-			mutex_lock(&vm->mutex);
-			continue;
-		}
-
-		vm->page_tables[pt_idx].addr = 0;
-		vm->page_tables[pt_idx].bo = pt;
-	}
-
 	mutex_unlock(&vm->mutex);
-	return radeon_bo_reserve(bo_va->bo, false);
+	return 0;
 }
 
 /**
@@ -580,54 +631,58 @@ static uint32_t radeon_vm_page_flags(uint32_t flags)
  *
  * Global and local mutex must be locked!
  */
-int radeon_vm_update_page_directory(struct radeon_device *rdev,
-				    struct radeon_vm *vm)
+static int radeon_vm_update_pdes(struct radeon_device *rdev,
+				 struct radeon_vm *vm,
+				 struct radeon_ib *ib,
+				 uint64_t start, uint64_t end)
 {
 	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
 
-	struct radeon_bo *pd = vm->page_directory;
-	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
 	uint64_t last_pde = ~0, last_pt = ~0;
-	unsigned count = 0, pt_idx, ndw;
-	struct radeon_ib ib;
+	unsigned count = 0;
+	uint64_t pt_idx;
 	int r;
 
-	/* padding, etc. */
-	ndw = 64;
-
-	/* assume the worst case */
-	ndw += vm->max_pde_used * 16;
-
-	/* update too big for an IB */
-	if (ndw > 0xfffff)
-		return -ENOMEM;
-
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
-	if (r)
-		return r;
-	ib.length_dw = 0;
+	start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+	end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
 
 	/* walk over the address space and update the page directory */
-	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
-		struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
+	for (pt_idx = start; pt_idx <= end; ++pt_idx) {
 		uint64_t pde, pt;
 
-		if (bo == NULL)
+		if (vm->page_tables[pt_idx])
 			continue;
 
-		pt = radeon_bo_gpu_offset(bo);
-		if (vm->page_tables[pt_idx].addr == pt)
-			continue;
-		vm->page_tables[pt_idx].addr = pt;
+retry:
+		r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
+				     &vm->page_tables[pt_idx],
+				     RADEON_VM_PTE_COUNT * 8,
+				     RADEON_GPU_PAGE_SIZE, false);
+
+		if (r == -ENOMEM) {
+			r = radeon_vm_evict(rdev, vm);
+			if (r)
+				return r;
+			goto retry;
+		} else if (r) {
+			return r;
+		}
+
+		pde = vm->pd_gpu_addr + pt_idx * 8;
+
+		pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
 
-		pde = pd_addr + pt_idx * 8;
 		if (((last_pde + 8 * count) != pde) ||
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				radeon_asic_vm_set_page(rdev, &ib, last_pde,
+				radeon_asic_vm_set_page(rdev, ib, last_pde,
 							last_pt, count, incr,
 							R600_PTE_VALID);
+
+				count *= RADEON_VM_PTE_COUNT;
+				radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
+							count, 0, 0);
 			}
 
 			count = 1;
@@ -638,23 +693,14 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 		}
 	}
 
-	if (count)
-		radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count,
+	if (count) {
+		radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
 					incr, R600_PTE_VALID);
 
-	if (ib.length_dw != 0) {
-		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
-		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
-		r = radeon_ib_schedule(rdev, &ib, NULL);
-		if (r) {
-			radeon_ib_free(rdev, &ib);
-			return r;
-		}
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
+		count *= RADEON_VM_PTE_COUNT;
+		radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
+					count, 0, 0);
 	}
-	radeon_ib_free(rdev, &ib);
 
 	return 0;
 }
@@ -691,18 +737,15 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
-		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
 		unsigned nptes;
 		uint64_t pte;
 
-		radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
-
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
 
-		pte = radeon_bo_gpu_offset(pt);
+		pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
 		pte += (addr & mask) * 8;
 
 		if ((last_pte + 8 * count) != pte) {
@@ -743,7 +786,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
  * Fill in the page table entries for @bo (cayman+).
  * Returns 0 for success, -EINVAL for failure.
  *
- * Object have to be reserved and mutex must be locked!
+ * Object have to be reserved & global and local mutex must be locked!
  */
 int radeon_vm_bo_update(struct radeon_device *rdev,
 			struct radeon_vm *vm,
@@ -752,10 +795,14 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 {
 	struct radeon_ib ib;
 	struct radeon_bo_va *bo_va;
-	unsigned nptes, ndw;
+	unsigned nptes, npdes, ndw;
 	uint64_t addr;
 	int r;
 
+	/* nothing to do if vm isn't bound */
+	if (vm->page_directory == NULL)
+		return 0;
+
 	bo_va = radeon_vm_bo_find(vm, bo);
 	if (bo_va == NULL) {
 		dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
@@ -793,6 +840,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 
 	nptes = radeon_bo_ngpu_pages(bo);
 
+	/* assume two extra pdes in case the mapping overlaps the borders */
+	npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
+
 	/* padding, etc. */
 	ndw = 64;
 
@@ -807,6 +857,15 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	/* reserve space for pte addresses */
 	ndw += nptes * 2;
 
+	/* reserve space for one header for every 2k dwords */
+	ndw += (npdes >> 11) * 4;
+
+	/* reserve space for pde addresses */
+	ndw += npdes * 2;
+
+	/* reserve space for clearing new page tables */
+	ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
+
 	/* update too big for an IB */
 	if (ndw > 0xfffff)
 		return -ENOMEM;
@@ -816,6 +875,12 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return r;
 	ib.length_dw = 0;
 
+	r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+
 	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
 			      addr, radeon_vm_page_flags(bo_va->flags));
 
@@ -851,10 +916,12 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 {
 	int r = 0;
 
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&bo_va->vm->mutex);
-	if (bo_va->soffset)
+	if (bo_va->soffset) {
 		r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
-
+	}
+	mutex_unlock(&rdev->vm_manager.lock);
 	list_del(&bo_va->vm_list);
 	mutex_unlock(&bo_va->vm->mutex);
 	list_del(&bo_va->bo_list);
@@ -890,43 +957,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
  *
  * Init @vm fields (cayman+).
  */
-int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
+void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
-	unsigned pd_size, pd_entries, pts_size;
-	int r;
-
 	vm->id = 0;
 	vm->fence = NULL;
 	vm->last_flush = NULL;
 	vm->last_id_use = NULL;
 	mutex_init(&vm->mutex);
+	INIT_LIST_HEAD(&vm->list);
 	INIT_LIST_HEAD(&vm->va);
-
-	pd_size = radeon_vm_directory_size(rdev);
-	pd_entries = radeon_vm_num_pdes(rdev);
-
-	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
-	if (vm->page_tables == NULL) {
-		DRM_ERROR("Cannot allocate memory for page table array\n");
-		return -ENOMEM;
-	}
-
-	r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false,
-			     RADEON_GEM_DOMAIN_VRAM, NULL,
-			     &vm->page_directory);
-	if (r)
-		return r;
-
-	r = radeon_vm_clear_bo(rdev, vm->page_directory);
-	if (r) {
-		radeon_bo_unref(&vm->page_directory);
-		vm->page_directory = NULL;
-		return r;
-	}
-
-	return 0;
 }
 
 /**
@@ -941,7 +980,12 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_bo_va *bo_va, *tmp;
-	int i, r;
+	int r;
+
+	mutex_lock(&rdev->vm_manager.lock);
+	mutex_lock(&vm->mutex);
+	radeon_vm_free_pt(rdev, vm);
+	mutex_unlock(&rdev->vm_manager.lock);
 
 	if (!list_empty(&vm->va)) {
 		dev_err(rdev->dev, "still active bo inside vm\n");
@@ -955,17 +999,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 			kfree(bo_va);
 		}
 	}
-
-
-	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
-		radeon_bo_unref(&vm->page_tables[i].bo);
-	kfree(vm->page_tables);
-
-	radeon_bo_unref(&vm->page_directory);
-
 	radeon_fence_unref(&vm->fence);
 	radeon_fence_unref(&vm->last_flush);
 	radeon_fence_unref(&vm->last_id_use);
-
-	mutex_destroy(&vm->mutex);
+	mutex_unlock(&vm->mutex);
 }
-- 
1.9.1


[-- Attachment #4: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-23  9:56                       ` Christian König
@ 2014-06-24  6:49                         ` Michel Dänzer
  2014-06-24 10:14                           ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-06-24  6:49 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 1308 bytes --]

On 23.06.2014 18:56, Christian König wrote:
> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>> On 19.06.2014 18:45, Christian König wrote:
>>
>>> I think even when we revert to the old code we have a couple of unsolved
>>> problems with the VM support or in the driver in general where we should
>>> try to understand the underlying reason for it instead of applying more
>>> workarounds.
>> I'm not suggesting applying more workarounds but going back to a known
>> more stable state. It seems like we've maneuvered ourselves to a rather
>> uncomfortable position from there, with no clear way to a better place.
>> But if we basically started from the 3.14 state again, we have a few
>> known hurdles like mine and Marek's Bonaire etc. which we know any
>> further improvements will have to pass before they can be considered for
>> general consumption.
> 
> Yeah agree, especially on the uncomfortable position.
> 
> Please try with the two attached patches applied on top of 3.15 and
> retest. They should revert back to the old implementation.

Unfortunately, X fails to start with these, see the attached excerpt
from dmesg.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

[-- Attachment #2: dmesg.txt --]
[-- Type: text/plain, Size: 10839 bytes --]

[    5.401968] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.402095] radeon 0000:01:00.0: still active bo inside vm
[    5.402132] 
[    5.402141] ======================================================
[    5.402171] [ INFO: possible circular locking dependency detected ]
[    5.402202] 3.15.0 #133 Tainted: G           O 
[    5.402225] -------------------------------------------------------
[    5.402251] Xorg/855 is trying to acquire lock:
[    5.402272]  (reservation_ww_class_mutex){+.+.+.}, at: [<ffffffffa03387f7>] radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.402340] 
[    5.402340] but task is already holding lock:
[    5.402366]  (&vm->mutex){+.+...}, at: [<ffffffffa0338762>] radeon_vm_fini+0x3a/0x1b6 [radeon]
[    5.402424] 
[    5.402424] which lock already depends on the new lock.
[    5.402424] 
[    5.402463] 
[    5.402463] the existing dependency chain (in reverse order) is:
[    5.402500] 
[    5.402500] -> #1 (&vm->mutex){+.+...}:
[    5.402528]        [<ffffffff81076fe0>] lock_acquire+0xa1/0x111
[    5.402561]        [<ffffffff813c2dc8>] mutex_lock_nested+0x72/0x3c6
[    5.402597]        [<ffffffffa0337eb3>] radeon_vm_bo_add+0x7b/0xbd [radeon]
[    5.402638]        [<ffffffffa029c522>] radeon_driver_open_kms+0xa0/0x174 [radeon]
[    5.402679]        [<ffffffffa01b57b7>] drm_open+0x230/0x456 [drm]
[    5.402717]        [<ffffffffa01b5a8a>] drm_stub_open+0xad/0xde [drm]
[    5.402755]        [<ffffffff8113049f>] chrdev_open+0x143/0x169
[    5.402792]        [<ffffffff8112b6e5>] do_dentry_open+0x223/0x238
[    5.402824]        [<ffffffff8112b728>] finish_open+0x2e/0x38
[    5.402854]        [<ffffffff811381f4>] do_last+0x846/0xa01
[    5.402886]        [<ffffffff811385e4>] path_openat+0x235/0x4ef
[    5.402917]        [<ffffffff811399c1>] do_filp_open+0x35/0x7a
[    5.402948]        [<ffffffff8112c135>] do_sys_open+0x6b/0xfa
[    5.402975]        [<ffffffff8112c1dd>] SyS_open+0x19/0x1b
[    5.403004]        [<ffffffff813cbb22>] system_call_fastpath+0x16/0x1b
[    5.403038] 
[    5.403038] -> #0 (reservation_ww_class_mutex){+.+.+.}:
[    5.403071]        [<ffffffff81076a0e>] __lock_acquire+0xae5/0xd9c
[    5.403103]        [<ffffffff81076fe0>] lock_acquire+0xa1/0x111
[    5.403132]        [<ffffffff813c392c>] mutex_lock_interruptible_nested+0x72/0x3d5
[    5.403168]        [<ffffffffa03387f7>] radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.403212]        [<ffffffffa029c5b5>] radeon_driver_open_kms+0x133/0x174 [radeon]
[    5.403253]        [<ffffffffa01b57b7>] drm_open+0x230/0x456 [drm]
[    5.403289]        [<ffffffffa01b5a8a>] drm_stub_open+0xad/0xde [drm]
[    5.403325]        [<ffffffff8113049f>] chrdev_open+0x143/0x169
[    5.403356]        [<ffffffff8112b6e5>] do_dentry_open+0x223/0x238
[    5.403389]        [<ffffffff8112b728>] finish_open+0x2e/0x38
[    5.403417]        [<ffffffff811381f4>] do_last+0x846/0xa01
[    5.403446]        [<ffffffff811385e4>] path_openat+0x235/0x4ef
[    5.403478]        [<ffffffff811399c1>] do_filp_open+0x35/0x7a
[    5.403508]        [<ffffffff8112c135>] do_sys_open+0x6b/0xfa
[    5.403537]        [<ffffffff8112c1dd>] SyS_open+0x19/0x1b
[    5.403567]        [<ffffffff813cbb22>] system_call_fastpath+0x16/0x1b
[    5.403597] 
[    5.403597] other info that might help us debug this:
[    5.403597] 
[    5.403632]  Possible unsafe locking scenario:
[    5.403632] 
[    5.403660]        CPU0                    CPU1
[    5.404874]        ----                    ----
[    5.406077]   lock(&vm->mutex);
[    5.407250]                                lock(reservation_ww_class_mutex);
[    5.408449]                                lock(&vm->mutex);
[    5.409601]   lock(reservation_ww_class_mutex);
[    5.410759] 
[    5.410759]  *** DEADLOCK ***
[    5.410759] 
[    5.414095] 2 locks held by Xorg/855:
[    5.415180]  #0:  (drm_global_mutex){+.+.+.}, at: [<ffffffffa01b5a1a>] drm_stub_open+0x3d/0xde [drm]
[    5.416333]  #1:  (&vm->mutex){+.+...}, at: [<ffffffffa0338762>] radeon_vm_fini+0x3a/0x1b6 [radeon]
[    5.417487] 
[    5.417487] stack backtrace:
[    5.419659] CPU: 2 PID: 855 Comm: Xorg Tainted: G           O  3.15.0 #133
[    5.420762] Hardware name: System manufacturer System Product Name/A88X-PRO, BIOS 1001 04/01/2014
[    5.421898]  0000000000000000 ffff88021150b9a0 ffffffff813c03cc ffffffff81fad2d0
[    5.423030]  ffff88021150b9e0 ffffffff813bda7b ffff88020fc7ca10 ffff88020fc7ca10
[    5.424129]  ffff88020fc7d338 0000000000000002 ffff88020fc7d370 0000000000000002
[    5.425225] Call Trace:
[    5.426335]  [<ffffffff813c03cc>] dump_stack+0x4d/0x66
[    5.427411]  [<ffffffff813bda7b>] print_circular_bug+0x1f6/0x204
[    5.428465]  [<ffffffff81076a0e>] __lock_acquire+0xae5/0xd9c
[    5.429540]  [<ffffffff81008972>] ? native_sched_clock+0x3a/0x3c
[    5.430608]  [<ffffffff81076fe0>] lock_acquire+0xa1/0x111
[    5.431710]  [<ffffffffa03387f7>] ? radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.432809]  [<ffffffff813c392c>] mutex_lock_interruptible_nested+0x72/0x3d5
[    5.433903]  [<ffffffffa03387f7>] ? radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.434988]  [<ffffffff812a3698>] ? dev_err+0x57/0x59
[    5.436096]  [<ffffffffa03387f7>] ? radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.437147]  [<ffffffffa03387f7>] radeon_vm_fini+0xcf/0x1b6 [radeon]
[    5.438204]  [<ffffffffa029c5b5>] radeon_driver_open_kms+0x133/0x174 [radeon]
[    5.439254]  [<ffffffffa01b57b7>] drm_open+0x230/0x456 [drm]
[    5.440354]  [<ffffffffa01b5a8a>] drm_stub_open+0xad/0xde [drm]
[    5.441425]  [<ffffffff8113049f>] chrdev_open+0x143/0x169
[    5.442485]  [<ffffffff8113035c>] ? cdev_put+0x1e/0x1e
[    5.443551]  [<ffffffff8112b6e5>] do_dentry_open+0x223/0x238
[    5.444604]  [<ffffffff8112b728>] finish_open+0x2e/0x38
[    5.445676]  [<ffffffff811381f4>] do_last+0x846/0xa01
[    5.446740]  [<ffffffff811385e4>] path_openat+0x235/0x4ef
[    5.447811]  [<ffffffff811399c1>] do_filp_open+0x35/0x7a
[    5.448899]  [<ffffffff813c53b0>] ? _raw_spin_unlock+0x22/0x2d
[    5.449993]  [<ffffffff81143aa5>] ? __alloc_fd+0xee/0xfd
[    5.451061]  [<ffffffff8112c135>] do_sys_open+0x6b/0xfa
[    5.452121]  [<ffffffff8112c1dd>] SyS_open+0x19/0x1b
[    5.453186]  [<ffffffff813cbb22>] system_call_fastpath+0x16/0x1b
[    5.587862] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.587868] radeon 0000:01:00.0: still active bo inside vm
[    5.587898] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.587901] radeon 0000:01:00.0: still active bo inside vm
[    5.587971] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.587974] radeon 0000:01:00.0: still active bo inside vm
[    5.588009] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.588012] radeon 0000:01:00.0: still active bo inside vm
[    5.696557] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.696562] radeon 0000:01:00.0: still active bo inside vm
[    5.696598] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.696601] radeon 0000:01:00.0: still active bo inside vm
[    5.696637] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.696640] radeon 0000:01:00.0: still active bo inside vm
[    5.696663] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.696665] radeon 0000:01:00.0: still active bo inside vm
[    5.802864] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.802869] radeon 0000:01:00.0: still active bo inside vm
[    5.802904] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.802906] radeon 0000:01:00.0: still active bo inside vm
[    5.802973] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.802975] radeon 0000:01:00.0: still active bo inside vm
[    5.803003] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.803006] radeon 0000:01:00.0: still active bo inside vm
[    5.917046] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.917053] radeon 0000:01:00.0: still active bo inside vm
[    5.917109] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.917113] radeon 0000:01:00.0: still active bo inside vm
[    5.917181] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.917185] radeon 0000:01:00.0: still active bo inside vm
[    5.917221] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    5.917225] radeon 0000:01:00.0: still active bo inside vm
[    6.025320] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.025326] radeon 0000:01:00.0: still active bo inside vm
[    6.025372] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.025376] radeon 0000:01:00.0: still active bo inside vm
[    6.025439] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.025442] radeon 0000:01:00.0: still active bo inside vm
[    6.025479] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.025482] radeon 0000:01:00.0: still active bo inside vm
[    6.084335] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.084342] radeon 0000:01:00.0: still active bo inside vm
[    6.084398] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.084404] radeon 0000:01:00.0: still active bo inside vm
[    6.084476] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.084480] radeon 0000:01:00.0: still active bo inside vm
[    6.084520] radeon 0000:01:00.0: bo ffff8800db841000 va 0x00000000 conflict with (bo ffff8800db841000 0x00100000 0x00200000)
[    6.084525] radeon 0000:01:00.0: still active bo inside vm

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-24  6:49                         ` Michel Dänzer
@ 2014-06-24 10:14                           ` Christian König
  2014-06-25  3:59                             ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-24 10:14 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 1531 bytes --]

Am 24.06.2014 08:49, schrieb Michel Dänzer:
> On 23.06.2014 18:56, Christian König wrote:
>> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>>> On 19.06.2014 18:45, Christian König wrote:
>>>
>>>> I think even when we revert to the old code we have a couple of unsolved
>>>> problems with the VM support or in the driver in general where we should
>>>> try to understand the underlying reason for it instead of applying more
>>>> workarounds.
>>> I'm not suggesting applying more workarounds but going back to a known
>>> more stable state. It seems like we've maneuvered ourselves to a rather
>>> uncomfortable position from there, with no clear way to a better place.
>>> But if we basically started from the 3.14 state again, we have a few
>>> known hurdles like mine and Marek's Bonaire etc. which we know any
>>> further improvements will have to pass before they can be considered for
>>> general consumption.
>> Yeah agree, especially on the uncomfortable position.
>>
>> Please try with the two attached patches applied on top of 3.15 and
>> retest. They should revert back to the old implementation.
> Unfortunately, X fails to start with these, see the attached excerpt
> from dmesg.

My fault, incorrectly solved a merge conflict and then failed to test 
the right kernel.

BTW: Wasn't there an option to tell grup to use the latest installed 
kernel instead of the one with the highest version number? Can't seem to 
find that any more.

Please try attached patches instead,
Christian.

[-- Attachment #2: 0001-drm-radeon-Revert-drop-non-blocking-allocations-from.patch --]
[-- Type: text/x-diff, Size: 3415 bytes --]

>From 17300436e5598357cb9396d3d52c8c40064adc16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 23 Jun 2014 11:07:29 +0200
Subject: [PATCH 1/2] drm/radeon: Revert drop non blocking allocations from sub
 allocator

The next revert needs this functionality.

This reverts commit 4d1526466296360f56f93c195848c1202b0cc10b.
---
 drivers/gpu/drm/radeon/radeon_object.h    | 2 +-
 drivers/gpu/drm/radeon/radeon_ring.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_sa.c        | 7 +++++--
 drivers/gpu/drm/radeon/radeon_semaphore.c | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 9e7b25a..7dff64d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -180,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
 extern int radeon_sa_bo_new(struct radeon_device *rdev,
 			    struct radeon_sa_manager *sa_manager,
 			    struct radeon_sa_bo **sa_bo,
-			    unsigned size, unsigned align);
+			    unsigned size, unsigned align, bool block);
 extern void radeon_sa_bo_free(struct radeon_device *rdev,
 			      struct radeon_sa_bo **sa_bo,
 			      struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index f8050f5..62201db 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 {
 	int r;
 
-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
+	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
 	if (r) {
 		dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index adcf3e2..c062580 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo **sa_bo,
-		     unsigned size, unsigned align)
+		     unsigned size, unsigned align, bool block)
 {
 	struct radeon_fence *fences[RADEON_NUM_RINGS];
 	unsigned tries[RADEON_NUM_RINGS];
@@ -353,11 +353,14 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 		r = radeon_fence_wait_any(rdev, fences, false);
 		spin_lock(&sa_manager->wq.lock);
 		/* if we have nothing to wait for block */
-		if (r == -ENOENT) {
+		if (r == -ENOENT && block) {
 			r = wait_event_interruptible_locked(
 				sa_manager->wq, 
 				radeon_sa_event(sa_manager, size, align)
 			);
+
+		} else if (r == -ENOENT) {
+			r = -ENOMEM;
 		}
 
 	} while (!r);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index dbd6bcd..6140af6 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 		return -ENOMEM;
 	}
 	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
-			     8 * RADEON_NUM_SYNCS, 8);
+			     8 * RADEON_NUM_SYNCS, 8, true);
 	if (r) {
 		kfree(*semaphore);
 		*semaphore = NULL;
-- 
1.9.1


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0002-drm-radeon-Revert-use-normal-BOs-for-the-page-tables.patch --]
[-- Type: text/x-diff; name="0002-drm-radeon-Revert-use-normal-BOs-for-the-page-tables.patch", Size: 29046 bytes --]

>From 28cd0733ff4b91b917962e964255f0a12278b29a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 23 Jun 2014 11:08:24 +0200
Subject: [PATCH 2/2] drm/radeon: Revert use normal BOs for the page tables v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts the commit "use normal BOs for the page tables v4" and the following depending bug fixes:

drm/radeon: sync page table updates
drm/radeon: fix vm buffer size estimation
drm/radeon: only allocate necessary size for vm bo list
drm/radeon: fix page directory update size estimation
drm/radeon: remove global vm lock

v2: fix incorrect merge conflict solving

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  24 +-
 drivers/gpu/drm/radeon/radeon_cs.c     |  48 ++-
 drivers/gpu/drm/radeon/radeon_device.c |   4 +-
 drivers/gpu/drm/radeon/radeon_kms.c    |   7 +-
 drivers/gpu/drm/radeon/radeon_ring.c   |   7 -
 drivers/gpu/drm/radeon/radeon_vm.c     | 513 ++++++++++++++++++---------------
 6 files changed, 309 insertions(+), 294 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8149e7c..b390d79 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -854,22 +854,17 @@ struct radeon_mec {
 #define R600_PTE_READABLE	(1 << 5)
 #define R600_PTE_WRITEABLE	(1 << 6)
 
-struct radeon_vm_pt {
-	struct radeon_bo		*bo;
-	uint64_t			addr;
-};
-
 struct radeon_vm {
+	struct list_head		list;
 	struct list_head		va;
 	unsigned			id;
 
 	/* contains the page directory */
-	struct radeon_bo		*page_directory;
+	struct radeon_sa_bo		*page_directory;
 	uint64_t			pd_gpu_addr;
-	unsigned			max_pde_used;
 
 	/* array of page tables, one for each page directory entry */
-	struct radeon_vm_pt		*page_tables;
+	struct radeon_sa_bo		**page_tables;
 
 	struct mutex			mutex;
 	/* last fence for cs using this vm */
@@ -881,7 +876,10 @@ struct radeon_vm {
 };
 
 struct radeon_vm_manager {
+	struct mutex			lock;
+	struct list_head		lru_vm;
 	struct radeon_fence		*active[RADEON_NUM_VM];
+	struct radeon_sa_manager	sa_manager;
 	uint32_t			max_pfn;
 	/* number of VMIDs */
 	unsigned			nvm;
@@ -1013,7 +1011,6 @@ struct radeon_cs_parser {
 	unsigned		nrelocs;
 	struct radeon_cs_reloc	*relocs;
 	struct radeon_cs_reloc	**relocs_ptr;
-	struct radeon_cs_reloc	*vm_bos;
 	struct list_head	validated;
 	unsigned		dma_reloc_idx;
 	/* indices of various chunks */
@@ -2807,11 +2804,10 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev,
  */
 int radeon_vm_manager_init(struct radeon_device *rdev);
 void radeon_vm_manager_fini(struct radeon_device *rdev);
-int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
+void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
-					  struct radeon_vm *vm,
-                                          struct list_head *head);
+int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm);
+void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm);
 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring);
 void radeon_vm_flush(struct radeon_device *rdev,
@@ -2821,8 +2817,6 @@ void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
-int radeon_vm_update_page_directory(struct radeon_device *rdev,
-				    struct radeon_vm *vm);
 int radeon_vm_bo_update(struct radeon_device *rdev,
 			struct radeon_vm *vm,
 			struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41ecf8a..06a00a1 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -173,10 +173,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 
 	radeon_cs_buckets_get_list(&buckets, &p->validated);
 
-	if (p->cs_flags & RADEON_CS_USE_VM)
-		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
-					      &p->validated);
-
 	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 }
 
@@ -417,7 +413,6 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	kfree(parser->track);
 	kfree(parser->relocs);
 	kfree(parser->relocs_ptr);
-	kfree(parser->vm_bos);
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
@@ -457,32 +452,24 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 	return r;
 }
 
-static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
+static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
 				   struct radeon_vm *vm)
 {
-	struct radeon_device *rdev = p->rdev;
-	int i, r;
-
-	r = radeon_vm_update_page_directory(rdev, vm);
-	if (r)
-		return r;
+	struct radeon_device *rdev = parser->rdev;
+	struct radeon_cs_reloc *lobj;
+	struct radeon_bo *bo;
+	int r;
 
-	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo,
-				&rdev->ring_tmp_bo.bo->tbo.mem);
-	if (r)
+	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem);
+	if (r) {
 		return r;
-
-	for (i = 0; i < p->nrelocs; i++) {
-		struct radeon_bo *bo;
-
-		/* ignore duplicates */
-		if (p->relocs_ptr[i] != &p->relocs[i])
-			continue;
-
-		bo = p->relocs[i].robj;
-		r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem);
-		if (r)
+	}
+	list_for_each_entry(lobj, &parser->validated, tv.head) {
+		bo = lobj->robj;
+		r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem);
+		if (r) {
 			return r;
+		}
 	}
 	return 0;
 }
@@ -514,13 +501,20 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 		radeon_uvd_note_usage(rdev);
 
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
+	r = radeon_vm_alloc_pt(rdev, vm);
+	if (r) {
+		goto out;
+	}
 	r = radeon_bo_vm_update_pte(parser, vm);
 	if (r) {
 		goto out;
 	}
 	radeon_cs_sync_rings(parser);
 	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
+	radeon_semaphore_sync_to(parser->ib.semaphore,
+				 radeon_vm_grab_id(rdev, vm, parser->ring));
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
@@ -530,7 +524,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	}
 
 out:
+	radeon_vm_add_to_lru(rdev, vm);
 	mutex_unlock(&vm->mutex);
+	mutex_unlock(&rdev->vm_manager.lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2cd144c..9ebd035 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1195,12 +1195,14 @@ int radeon_device_init(struct radeon_device *rdev,
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
-
+	/* initialize vm here */
+	mutex_init(&rdev->vm_manager.lock);
 	/* Adjust VM size here.
 	 * Currently set to 4GB ((1 << 20) 4k pages).
 	 * Max GPUVM size for cayman and SI is 40 bits.
 	 */
 	rdev->vm_manager.max_pfn = 1 << 20;
+	INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba..cc47fa1 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -571,12 +571,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 			return -ENOMEM;
 		}
 
-		r = radeon_vm_init(rdev, &fpriv->vm);
-		if (r) {
-			kfree(fpriv);
-			return r;
-		}
-
+		radeon_vm_init(rdev, &fpriv->vm);
 		if (rdev->accel_working) {
 			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 			if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 62201db..4ddc6d77 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -145,13 +145,6 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 		return r;
 	}
 
-	/* grab a vm id if necessary */
-	if (ib->vm) {
-		struct radeon_fence *vm_id_fence;
-		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-        	radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
-	}
-
 	/* sync with other rings */
 	r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index c11b71d..5160176 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -84,19 +84,85 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
  */
 int radeon_vm_manager_init(struct radeon_device *rdev)
 {
+	struct radeon_vm *vm;
+	struct radeon_bo_va *bo_va;
 	int r;
+	unsigned size;
 
 	if (!rdev->vm_manager.enabled) {
+		/* allocate enough for 2 full VM pts */
+		size = radeon_vm_directory_size(rdev);
+		size += rdev->vm_manager.max_pfn * 8;
+		size *= 2;
+		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
+					      RADEON_GPU_PAGE_ALIGN(size),
+					      RADEON_VM_PTB_ALIGN_SIZE,
+					      RADEON_GEM_DOMAIN_VRAM);
+		if (r) {
+			dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
+				(rdev->vm_manager.max_pfn * 8) >> 10);
+			return r;
+		}
+
 		r = radeon_asic_vm_init(rdev);
 		if (r)
 			return r;
 
 		rdev->vm_manager.enabled = true;
+
+		r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
+		if (r)
+			return r;
+	}
+
+	/* restore page table */
+	list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
+		if (vm->page_directory == NULL)
+			continue;
+
+		list_for_each_entry(bo_va, &vm->va, vm_list) {
+			bo_va->valid = false;
+		}
 	}
 	return 0;
 }
 
 /**
+ * radeon_vm_free_pt - free the page table for a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Free the page table of a specific vm (cayman+).
+ *
+ * Global and local mutex must be lock!
+ */
+static void radeon_vm_free_pt(struct radeon_device *rdev,
+				    struct radeon_vm *vm)
+{
+	struct radeon_bo_va *bo_va;
+	int i;
+
+	if (!vm->page_directory)
+		return;
+
+	list_del_init(&vm->list);
+	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+
+	list_for_each_entry(bo_va, &vm->va, vm_list) {
+		bo_va->valid = false;
+	}
+
+	if (vm->page_tables == NULL)
+		return;
+
+	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
+		radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
+
+	kfree(vm->page_tables);
+}
+
+/**
  * radeon_vm_manager_fini - tear down the vm manager
  *
  * @rdev: radeon_device pointer
@@ -105,63 +171,155 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
  */
 void radeon_vm_manager_fini(struct radeon_device *rdev)
 {
+	struct radeon_vm *vm, *tmp;
 	int i;
 
 	if (!rdev->vm_manager.enabled)
 		return;
 
-	for (i = 0; i < RADEON_NUM_VM; ++i)
+	mutex_lock(&rdev->vm_manager.lock);
+	/* free all allocated page tables */
+	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
+		mutex_lock(&vm->mutex);
+		radeon_vm_free_pt(rdev, vm);
+		mutex_unlock(&vm->mutex);
+	}
+	for (i = 0; i < RADEON_NUM_VM; ++i) {
 		radeon_fence_unref(&rdev->vm_manager.active[i]);
+	}
 	radeon_asic_vm_fini(rdev);
+	mutex_unlock(&rdev->vm_manager.lock);
+
+	radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
+	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
 	rdev->vm_manager.enabled = false;
 }
 
 /**
- * radeon_vm_get_bos - add the vm BOs to a validation list
+ * radeon_vm_evict - evict page table to make room for new one
+ *
+ * @rdev: radeon_device pointer
+ * @vm: VM we want to allocate something for
  *
- * @vm: vm providing the BOs
- * @head: head of validation list
+ * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
+ * Returns 0 for success, -ENOMEM for failure.
  *
- * Add the page directory to the list of BOs to
- * validate for command submission (cayman+).
+ * Global and local mutex must be locked!
  */
-struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
-					  struct radeon_vm *vm,
-					  struct list_head *head)
+static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
 {
-	struct radeon_cs_reloc *list;
-	unsigned i, idx;
+	struct radeon_vm *vm_evict;
 
-	list = kmalloc_array(vm->max_pde_used + 2,
-			     sizeof(struct radeon_cs_reloc), GFP_KERNEL);
-	if (!list)
-		return NULL;
+	if (list_empty(&rdev->vm_manager.lru_vm))
+		return -ENOMEM;
 
-	/* add the vm page table to the list */
-	list[0].gobj = NULL;
-	list[0].robj = vm->page_directory;
-	list[0].domain = RADEON_GEM_DOMAIN_VRAM;
-	list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM;
-	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tiling_flags = 0;
-	list[0].handle = 0;
-	list_add(&list[0].tv.head, head);
-
-	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
-		if (!vm->page_tables[i].bo)
-			continue;
+	vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
+				    struct radeon_vm, list);
+	if (vm_evict == vm)
+		return -ENOMEM;
+
+	mutex_lock(&vm_evict->mutex);
+	radeon_vm_free_pt(rdev, vm_evict);
+	mutex_unlock(&vm_evict->mutex);
+	return 0;
+}
 
-		list[idx].gobj = NULL;
-		list[idx].robj = vm->page_tables[i].bo;
-		list[idx].domain = RADEON_GEM_DOMAIN_VRAM;
-		list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM;
-		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tiling_flags = 0;
-		list[idx].handle = 0;
-		list_add(&list[idx++].tv.head, head);
+/**
+ * radeon_vm_alloc_pt - allocates a page table for a VM
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to bind
+ *
+ * Allocate a page table for the requested vm (cayman+).
+ * Returns 0 for success, error for failure.
+ *
+ * Global and local mutex must be locked!
+ */
+int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	unsigned pd_size, pd_entries, pts_size;
+	struct radeon_ib ib;
+	int r;
+
+	if (vm == NULL) {
+		return -EINVAL;
+	}
+
+	if (vm->page_directory != NULL) {
+		return 0;
+	}
+
+	pd_size = radeon_vm_directory_size(rdev);
+	pd_entries = radeon_vm_num_pdes(rdev);
+
+retry:
+	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
+			     &vm->page_directory, pd_size,
+			     RADEON_VM_PTB_ALIGN_SIZE, false);
+	if (r == -ENOMEM) {
+		r = radeon_vm_evict(rdev, vm);
+		if (r)
+			return r;
+		goto retry;
+
+	} else if (r) {
+		return r;
 	}
 
-	return list;
+	vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
+
+	/* Initially clear the page directory */
+	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
+			  NULL, pd_entries * 2 + 64);
+	if (r) {
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return r;
+	}
+
+	ib.length_dw = 0;
+
+	radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
+				0, pd_entries, 0, 0);
+
+	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return r;
+	}
+	radeon_fence_unref(&vm->fence);
+	vm->fence = radeon_fence_ref(ib.fence);
+	radeon_ib_free(rdev, &ib);
+	radeon_fence_unref(&vm->last_flush);
+
+	/* allocate page table array */
+	pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
+	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+
+	if (vm->page_tables == NULL) {
+		DRM_ERROR("Cannot allocate memory for page table array\n");
+		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * radeon_vm_add_to_lru - add VMs page table to LRU list
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to add to LRU
+ *
+ * Add the allocated page table to the LRU list (cayman+).
+ *
+ * Global mutex must be locked!
+ */
+void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	list_del_init(&vm->list);
+	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
 }
 
 /**
@@ -235,14 +393,10 @@ void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     int ring)
 {
-	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
-
 	/* if we can't remember our last VM flush then flush now! */
 	/* XXX figure out why we have to flush all the time */
-	if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) {
-		vm->pd_gpu_addr = pd_addr;
+	if (!vm->last_flush || true)
 		radeon_ring_vm_flush(rdev, ring, vm);
-	}
 }
 
 /**
@@ -342,63 +496,6 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
 }
 
 /**
- * radeon_vm_clear_bo - initially clear the page dir/table
- *
- * @rdev: radeon_device pointer
- * @bo: bo to clear
- */
-static int radeon_vm_clear_bo(struct radeon_device *rdev,
-			      struct radeon_bo *bo)
-{
-        struct ttm_validate_buffer tv;
-        struct ww_acquire_ctx ticket;
-        struct list_head head;
-	struct radeon_ib ib;
-	unsigned entries;
-	uint64_t addr;
-	int r;
-
-        memset(&tv, 0, sizeof(tv));
-        tv.bo = &bo->tbo;
-
-        INIT_LIST_HEAD(&head);
-        list_add(&tv.head, &head);
-
-        r = ttm_eu_reserve_buffers(&ticket, &head);
-        if (r)
-		return r;
-
-        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-        if (r)
-                goto error;
-
-	addr = radeon_bo_gpu_offset(bo);
-	entries = radeon_bo_size(bo) / 8;
-
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
-			  NULL, entries * 2 + 64);
-	if (r)
-                goto error;
-
-	ib.length_dw = 0;
-
-	radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0);
-
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r)
-                goto error;
-
-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
-	radeon_ib_free(rdev, &ib);
-
-	return 0;
-
-error:
-	ttm_eu_backoff_reservation(&ticket, &head);
-	return r;
-}
-
-/**
  * radeon_vm_bo_set_addr - set bos virtual address inside a vm
  *
  * @rdev: radeon_device pointer
@@ -422,8 +519,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	struct radeon_vm *vm = bo_va->vm;
 	struct radeon_bo_va *tmp;
 	struct list_head *head;
-	unsigned last_pfn, pt_idx;
-	int r;
+	unsigned last_pfn;
 
 	if (soffset) {
 		/* make sure object fit at this offset */
@@ -474,53 +570,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	bo_va->valid = false;
 	list_move(&bo_va->vm_list, head);
 
-	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-
-	if (eoffset > vm->max_pde_used)
-		vm->max_pde_used = eoffset;
-
-	radeon_bo_unreserve(bo_va->bo);
-
-	/* walk over the address space and allocate the page tables */
-	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
-		struct radeon_bo *pt;
-
-		if (vm->page_tables[pt_idx].bo)
-			continue;
-
-		/* drop mutex to allocate and clear page table */
-		mutex_unlock(&vm->mutex);
-
-		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
-				     RADEON_GPU_PAGE_SIZE, false, 
-				     RADEON_GEM_DOMAIN_VRAM, NULL, &pt);
-		if (r)
-			return r;
-
-		r = radeon_vm_clear_bo(rdev, pt);
-		if (r) {
-			radeon_bo_unref(&pt);
-			radeon_bo_reserve(bo_va->bo, false);
-			return r;
-		}
-
-		/* aquire mutex again */
-		mutex_lock(&vm->mutex);
-		if (vm->page_tables[pt_idx].bo) {
-			/* someone else allocated the pt in the meantime */
-			mutex_unlock(&vm->mutex);
-			radeon_bo_unref(&pt);
-			mutex_lock(&vm->mutex);
-			continue;
-		}
-
-		vm->page_tables[pt_idx].addr = 0;
-		vm->page_tables[pt_idx].bo = pt;
-	}
-
 	mutex_unlock(&vm->mutex);
-	return radeon_bo_reserve(bo_va->bo, false);
+	return 0;
 }
 
 /**
@@ -580,54 +631,58 @@ static uint32_t radeon_vm_page_flags(uint32_t flags)
  *
  * Global and local mutex must be locked!
  */
-int radeon_vm_update_page_directory(struct radeon_device *rdev,
-				    struct radeon_vm *vm)
+static int radeon_vm_update_pdes(struct radeon_device *rdev,
+				 struct radeon_vm *vm,
+				 struct radeon_ib *ib,
+				 uint64_t start, uint64_t end)
 {
 	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
 
-	struct radeon_bo *pd = vm->page_directory;
-	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
 	uint64_t last_pde = ~0, last_pt = ~0;
-	unsigned count = 0, pt_idx, ndw;
-	struct radeon_ib ib;
+	unsigned count = 0;
+	uint64_t pt_idx;
 	int r;
 
-	/* padding, etc. */
-	ndw = 64;
-
-	/* assume the worst case */
-	ndw += vm->max_pde_used * 16;
-
-	/* update too big for an IB */
-	if (ndw > 0xfffff)
-		return -ENOMEM;
-
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
-	if (r)
-		return r;
-	ib.length_dw = 0;
+	start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+	end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
 
 	/* walk over the address space and update the page directory */
-	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
-		struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
+	for (pt_idx = start; pt_idx <= end; ++pt_idx) {
 		uint64_t pde, pt;
 
-		if (bo == NULL)
+		if (vm->page_tables[pt_idx])
 			continue;
 
-		pt = radeon_bo_gpu_offset(bo);
-		if (vm->page_tables[pt_idx].addr == pt)
-			continue;
-		vm->page_tables[pt_idx].addr = pt;
+retry:
+		r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
+				     &vm->page_tables[pt_idx],
+				     RADEON_VM_PTE_COUNT * 8,
+				     RADEON_GPU_PAGE_SIZE, false);
+
+		if (r == -ENOMEM) {
+			r = radeon_vm_evict(rdev, vm);
+			if (r)
+				return r;
+			goto retry;
+		} else if (r) {
+			return r;
+		}
+
+		pde = vm->pd_gpu_addr + pt_idx * 8;
+
+		pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
 
-		pde = pd_addr + pt_idx * 8;
 		if (((last_pde + 8 * count) != pde) ||
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				radeon_asic_vm_set_page(rdev, &ib, last_pde,
+				radeon_asic_vm_set_page(rdev, ib, last_pde,
 							last_pt, count, incr,
 							R600_PTE_VALID);
+
+				count *= RADEON_VM_PTE_COUNT;
+				radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
+							count, 0, 0);
 			}
 
 			count = 1;
@@ -638,23 +693,14 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 		}
 	}
 
-	if (count)
-		radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count,
+	if (count) {
+		radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
 					incr, R600_PTE_VALID);
 
-	if (ib.length_dw != 0) {
-		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
-		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
-		r = radeon_ib_schedule(rdev, &ib, NULL);
-		if (r) {
-			radeon_ib_free(rdev, &ib);
-			return r;
-		}
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
+		count *= RADEON_VM_PTE_COUNT;
+		radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
+					count, 0, 0);
 	}
-	radeon_ib_free(rdev, &ib);
 
 	return 0;
 }
@@ -691,18 +737,15 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
-		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
 		unsigned nptes;
 		uint64_t pte;
 
-		radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
-
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
 
-		pte = radeon_bo_gpu_offset(pt);
+		pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
 		pte += (addr & mask) * 8;
 
 		if ((last_pte + 8 * count) != pte) {
@@ -743,7 +786,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
  * Fill in the page table entries for @bo (cayman+).
  * Returns 0 for success, -EINVAL for failure.
  *
- * Object have to be reserved and mutex must be locked!
+ * Object have to be reserved & global and local mutex must be locked!
  */
 int radeon_vm_bo_update(struct radeon_device *rdev,
 			struct radeon_vm *vm,
@@ -752,10 +795,14 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 {
 	struct radeon_ib ib;
 	struct radeon_bo_va *bo_va;
-	unsigned nptes, ndw;
+	unsigned nptes, npdes, ndw;
 	uint64_t addr;
 	int r;
 
+	/* nothing to do if vm isn't bound */
+	if (vm->page_directory == NULL)
+		return 0;
+
 	bo_va = radeon_vm_bo_find(vm, bo);
 	if (bo_va == NULL) {
 		dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
@@ -793,6 +840,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 
 	nptes = radeon_bo_ngpu_pages(bo);
 
+	/* assume two extra pdes in case the mapping overlaps the borders */
+	npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
+
 	/* padding, etc. */
 	ndw = 64;
 
@@ -807,6 +857,15 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	/* reserve space for pte addresses */
 	ndw += nptes * 2;
 
+	/* reserve space for one header for every 2k dwords */
+	ndw += (npdes >> 11) * 4;
+
+	/* reserve space for pde addresses */
+	ndw += npdes * 2;
+
+	/* reserve space for clearing new page tables */
+	ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
+
 	/* update too big for an IB */
 	if (ndw > 0xfffff)
 		return -ENOMEM;
@@ -816,6 +875,12 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return r;
 	ib.length_dw = 0;
 
+	r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+
 	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
 			      addr, radeon_vm_page_flags(bo_va->flags));
 
@@ -851,10 +916,12 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 {
 	int r = 0;
 
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&bo_va->vm->mutex);
-	if (bo_va->soffset)
+	if (bo_va->soffset) {
 		r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
-
+	}
+	mutex_unlock(&rdev->vm_manager.lock);
 	list_del(&bo_va->vm_list);
 	mutex_unlock(&bo_va->vm->mutex);
 	list_del(&bo_va->bo_list);
@@ -890,43 +957,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
  *
  * Init @vm fields (cayman+).
  */
-int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
+void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
-	unsigned pd_size, pd_entries, pts_size;
-	int r;
-
 	vm->id = 0;
 	vm->fence = NULL;
 	vm->last_flush = NULL;
 	vm->last_id_use = NULL;
 	mutex_init(&vm->mutex);
+	INIT_LIST_HEAD(&vm->list);
 	INIT_LIST_HEAD(&vm->va);
-
-	pd_size = radeon_vm_directory_size(rdev);
-	pd_entries = radeon_vm_num_pdes(rdev);
-
-	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
-	if (vm->page_tables == NULL) {
-		DRM_ERROR("Cannot allocate memory for page table array\n");
-		return -ENOMEM;
-	}
-
-	r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false,
-			     RADEON_GEM_DOMAIN_VRAM, NULL,
-			     &vm->page_directory);
-	if (r)
-		return r;
-
-	r = radeon_vm_clear_bo(rdev, vm->page_directory);
-	if (r) {
-		radeon_bo_unref(&vm->page_directory);
-		vm->page_directory = NULL;
-		return r;
-	}
-
-	return 0;
 }
 
 /**
@@ -941,7 +980,12 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_bo_va *bo_va, *tmp;
-	int i, r;
+	int r;
+
+	mutex_lock(&rdev->vm_manager.lock);
+	mutex_lock(&vm->mutex);
+	radeon_vm_free_pt(rdev, vm);
+	mutex_unlock(&rdev->vm_manager.lock);
 
 	if (!list_empty(&vm->va)) {
 		dev_err(rdev->dev, "still active bo inside vm\n");
@@ -955,17 +999,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 			kfree(bo_va);
 		}
 	}
-
-
-	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
-		radeon_bo_unref(&vm->page_tables[i].bo);
-	kfree(vm->page_tables);
-
-	radeon_bo_unref(&vm->page_directory);
-
 	radeon_fence_unref(&vm->fence);
 	radeon_fence_unref(&vm->last_flush);
 	radeon_fence_unref(&vm->last_id_use);
-
-	mutex_destroy(&vm->mutex);
+	mutex_unlock(&vm->mutex);
 }
-- 
1.9.1


[-- Attachment #4: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-24 10:14                           ` Christian König
@ 2014-06-25  3:59                             ` Michel Dänzer
  2014-06-26 12:25                               ` Dieter Nützel
  2014-06-27  2:31                               ` Michel Dänzer
  0 siblings, 2 replies; 36+ messages in thread
From: Michel Dänzer @ 2014-06-25  3:59 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 24.06.2014 19:14, Christian König wrote:
> Am 24.06.2014 08:49, schrieb Michel Dänzer:
>> On 23.06.2014 18:56, Christian König wrote:
>>> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>>>> On 19.06.2014 18:45, Christian König wrote:
>>>>
>>>>> I think even when we revert to the old code we have a couple of
>>>>> unsolved
>>>>> problems with the VM support or in the driver in general where we
>>>>> should
>>>>> try to understand the underlying reason for it instead of applying
>>>>> more
>>>>> workarounds.
>>>> I'm not suggesting applying more workarounds but going back to a known
>>>> more stable state. It seems like we've maneuvered ourselves to a rather
>>>> uncomfortable position from there, with no clear way to a better place.
>>>> But if we basically started from the 3.14 state again, we have a few
>>>> known hurdles like mine and Marek's Bonaire etc. which we know any
>>>> further improvements will have to pass before they can be considered
>>>> for
>>>> general consumption.
>>> Yeah agree, especially on the uncomfortable position.
>>>
>>> Please try with the two attached patches applied on top of 3.15 and
>>> retest. They should revert back to the old implementation.
>> Unfortunately, X fails to start with these, see the attached excerpt
>> from dmesg.
> 
> My fault, incorrectly solved a merge conflict and then failed to test
> the right kernel.
> 
> BTW: Wasn't there an option to tell grup to use the latest installed
> kernel instead of the one with the highest version number? Can't seem to
> find that any more.

No idea, unfortunately.


> Please try attached patches instead,

With these patches, 3.15 just survived two piglit runs on my Bonaire,
one with the GART poisoning fix and one without. It never survived a
single run before.

Acked-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-25  3:59                             ` Michel Dänzer
@ 2014-06-26 12:25                               ` Dieter Nützel
  2014-06-27  2:31                               ` Michel Dänzer
  1 sibling, 0 replies; 36+ messages in thread
From: Dieter Nützel @ 2014-06-26 12:25 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

Am 25.06.2014 05:59, schrieb Michel Dänzer:
> On 24.06.2014 19:14, Christian König wrote:
>> Am 24.06.2014 08:49, schrieb Michel Dänzer:
>>> On 23.06.2014 18:56, Christian König wrote:
>>>> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>>>>> On 19.06.2014 18:45, Christian König wrote:
>>>>> 
>>>>>> I think even when we revert to the old code we have a couple of
>>>>>> unsolved
>>>>>> problems with the VM support or in the driver in general where we
>>>>>> should
>>>>>> try to understand the underlying reason for it instead of applying
>>>>>> more
>>>>>> workarounds.
>>>>> I'm not suggesting applying more workarounds but going back to a 
>>>>> known
>>>>> more stable state. It seems like we've maneuvered ourselves to a 
>>>>> rather
>>>>> uncomfortable position from there, with no clear way to a better 
>>>>> place.
>>>>> But if we basically started from the 3.14 state again, we have a 
>>>>> few
>>>>> known hurdles like mine and Marek's Bonaire etc. which we know any
>>>>> further improvements will have to pass before they can be 
>>>>> considered
>>>>> for
>>>>> general consumption.
>>>> Yeah agree, especially on the uncomfortable position.
>>>> 
>>>> Please try with the two attached patches applied on top of 3.15 and
>>>> retest. They should revert back to the old implementation.
>>> Unfortunately, X fails to start with these, see the attached excerpt
>>> from dmesg.
>> 
>> My fault, incorrectly solved a merge conflict and then failed to test
>> the right kernel.
>> 
>> BTW: Wasn't there an option to tell grup to use the latest installed
>> kernel instead of the one with the highest version number? Can't seem 
>> to
>> find that any more.

Maybe this helps (section 5. Grub 2 Files & Options).
http://ubuntuforums.org/showthread.php?t=1195275

GRUB_DEFAULT

Regards,
   Dieter
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-25  3:59                             ` Michel Dänzer
  2014-06-26 12:25                               ` Dieter Nützel
@ 2014-06-27  2:31                               ` Michel Dänzer
  2014-06-27  8:26                                 ` Christian König
  1 sibling, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-06-27  2:31 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 25.06.2014 12:59, Michel Dänzer wrote:
> On 24.06.2014 19:14, Christian König wrote:
>> Am 24.06.2014 08:49, schrieb Michel Dänzer:
>>> On 23.06.2014 18:56, Christian König wrote:
>>>> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>>>>> On 19.06.2014 18:45, Christian König wrote:
>>>>>
>>>>>> I think even when we revert to the old code we have a couple of
>>>>>> unsolved
>>>>>> problems with the VM support or in the driver in general where we
>>>>>> should
>>>>>> try to understand the underlying reason for it instead of applying
>>>>>> more
>>>>>> workarounds.
>>>>> I'm not suggesting applying more workarounds but going back to a known
>>>>> more stable state. It seems like we've maneuvered ourselves to a rather
>>>>> uncomfortable position from there, with no clear way to a better place.
>>>>> But if we basically started from the 3.14 state again, we have a few
>>>>> known hurdles like mine and Marek's Bonaire etc. which we know any
>>>>> further improvements will have to pass before they can be considered
>>>>> for
>>>>> general consumption.
>>>> Yeah agree, especially on the uncomfortable position.
>>>>
>>>> Please try with the two attached patches applied on top of 3.15 and
>>>> retest. They should revert back to the old implementation.
>>> Unfortunately, X fails to start with these, see the attached excerpt
>>> from dmesg.
>>
>> My fault, incorrectly solved a merge conflict and then failed to test
>> the right kernel.
>>
>> [...]
>> 
>> Please try attached patches instead,
> 
> With these patches, 3.15 just survived two piglit runs on my Bonaire,
> one with the GART poisoning fix and one without. It never survived a
> single run before.
> 
> Acked-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>

So, are these patches going to 3.16 and 3.15?


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-27  2:31                               ` Michel Dänzer
@ 2014-06-27  8:26                                 ` Christian König
  2014-06-27  8:59                                   ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-27  8:26 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

Am 27.06.2014 04:31, schrieb Michel Dänzer:
> On 25.06.2014 12:59, Michel Dänzer wrote:
>> On 24.06.2014 19:14, Christian König wrote:
>>> Am 24.06.2014 08:49, schrieb Michel Dänzer:
>>>> On 23.06.2014 18:56, Christian König wrote:
>>>>> Am 23.06.2014 10:15, schrieb Michel Dänzer:
>>>>>> On 19.06.2014 18:45, Christian König wrote:
>>>>>>
>>>>>>> I think even when we revert to the old code we have a couple of
>>>>>>> unsolved
>>>>>>> problems with the VM support or in the driver in general where we
>>>>>>> should
>>>>>>> try to understand the underlying reason for it instead of applying
>>>>>>> more
>>>>>>> workarounds.
>>>>>> I'm not suggesting applying more workarounds but going back to a known
>>>>>> more stable state. It seems like we've maneuvered ourselves to a rather
>>>>>> uncomfortable position from there, with no clear way to a better place.
>>>>>> But if we basically started from the 3.14 state again, we have a few
>>>>>> known hurdles like mine and Marek's Bonaire etc. which we know any
>>>>>> further improvements will have to pass before they can be considered
>>>>>> for
>>>>>> general consumption.
>>>>> Yeah agree, especially on the uncomfortable position.
>>>>>
>>>>> Please try with the two attached patches applied on top of 3.15 and
>>>>> retest. They should revert back to the old implementation.
>>>> Unfortunately, X fails to start with these, see the attached excerpt
>>>> from dmesg.
>>> My fault, incorrectly solved a merge conflict and then failed to test
>>> the right kernel.
>>>
>>> [...]
>>>
>>> Please try attached patches instead,
>> With these patches, 3.15 just survived two piglit runs on my Bonaire,
>> one with the GART poisoning fix and one without. It never survived a
>> single run before.
>>
>> Acked-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
> So, are these patches going to 3.16 and 3.15?

We could send them in for 3.15, but for 3.16 we have some new features 
that depend on the new code.

We could backport them to the old code, but I really want to work on 
figuring out what's wrong with the new approach instead.

Going to prepare a branch for you to test over the weekend, would be 
nice if you could give it a try on Monday and see if that fixes the 
issues as well.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-27  8:26                                 ` Christian König
@ 2014-06-27  8:59                                   ` Michel Dänzer
  2014-06-29 10:34                                     ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-06-27  8:59 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 27.06.2014 17:26, Christian König wrote:
> Am 27.06.2014 04:31, schrieb Michel Dänzer:
>> On 25.06.2014 12:59, Michel Dänzer wrote:
>>> 
>>> With these patches, 3.15 just survived two piglit runs on my Bonaire,
>>> one with the GART poisoning fix and one without. It never survived a
>>> single run before.
>>>
>>> Acked-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
>> So, are these patches going to 3.16 and 3.15?
> 
> We could send them in for 3.15,

What's the alternative for 3.15?

Looks like e.g. https://bugs.freedesktop.org/show_bug.cgi?id=80141 is
confirmed to be this.


> but for 3.16 we have some new features that depend on the new code.
> 
> We could backport them to the old code, but I really want to work on
> figuring out what's wrong with the new approach instead.
> 
> Going to prepare a branch for you to test over the weekend, would be
> nice if you could give it a try on Monday and see if that fixes the
> issues as well.

Sure, will do.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-27  8:59                                   ` Michel Dänzer
@ 2014-06-29 10:34                                     ` Christian König
  2014-06-30  6:10                                       ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-29 10:34 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

[-- Attachment #1: Type: text/plain, Size: 2531 bytes --]

Am 27.06.2014 10:59, schrieb Michel Dänzer:
> On 27.06.2014 17:26, Christian König wrote:
>> Am 27.06.2014 04:31, schrieb Michel Dänzer:
>>> On 25.06.2014 12:59, Michel Dänzer wrote:
>>>> With these patches, 3.15 just survived two piglit runs on my Bonaire,
>>>> one with the GART poisoning fix and one without. It never survived a
>>>> single run before.
>>>>
>>>> Acked-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
>>> So, are these patches going to 3.16 and 3.15?
>> We could send them in for 3.15,
> What's the alternative for 3.15?

Well, figuring out what's the real reason behind those lockups would be 
a good start :)

> Looks like e.g. https://bugs.freedesktop.org/show_bug.cgi?id=80141 is
> confirmed to be this.
>
>
>> but for 3.16 we have some new features that depend on the new code.
>>
>> We could backport them to the old code, but I really want to work on
>> figuring out what's wrong with the new approach instead.
>>
>> Going to prepare a branch for you to test over the weekend, would be
>> nice if you could give it a try on Monday and see if that fixes the
>> issues as well.
> Sure, will do.

I've just pushed the branch testing-3.15 to 
git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2 
and contains the "stop poisoning the GART TLB" patch backported to 3.15 
and a couple of things that I would like to try.

I've disabled the redirection of page faults to the dummy page for now 
and so the system should lockup on the first page fault it encounters. 
Apart from that the page directory and page tables are now completely 
over allocated and over aligned.

Setting the READABLE bit on invalid entries shouldn't have an effect 
other than making those entries non zero. So please try to lockup your 
bonaire with this branch and as soon as you encounter the first page 
fault take a look at VM_CONTEXT1_PROTECTION_FAULT_STATUS and figure out 
which VMID caused the lockup.

Then use the attached script to make a dump from the complete page 
directory and page table of the VMID in question. E.g. "./dump_vm.sh 1" 
if the lockup was caused by VMID 1 etc... Make sure you've got a 
radeontool that supports CIK, otherwise it would only return zeros as 
page directory address.

Since even the invalid page table entries should now have at least the 
READABLE bit set there shouldn't be anything zero in this dump and look 
out for anything else suspicious as well (0xdeadbeef etc...).

Thanks for the help,
Christian.

[-- Attachment #2: dump_vm.sh --]
[-- Type: application/x-shellscript, Size: 562 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-29 10:34                                     ` Christian König
@ 2014-06-30  6:10                                       ` Michel Dänzer
  2014-06-30  7:43                                         ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-06-30  6:10 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 29.06.2014 19:34, Christian König wrote:
> 
> I've just pushed the branch testing-3.15 to
> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
> and contains the "stop poisoning the GART TLB" patch backported to 3.15
> and a couple of things that I would like to try.

Running that branch, my Bonaire just survived a piglit run without
lockup. I hope that's an interesting result. :)


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-30  6:10                                       ` Michel Dänzer
@ 2014-06-30  7:43                                         ` Christian König
  2014-07-01  6:48                                           ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-06-30  7:43 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

Am 30.06.2014 08:10, schrieb Michel Dänzer:
> On 29.06.2014 19:34, Christian König wrote:
>> I've just pushed the branch testing-3.15 to
>> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
>> and contains the "stop poisoning the GART TLB" patch backported to 3.15
>> and a couple of things that I would like to try.
> Running that branch, my Bonaire just survived a piglit run without
> lockup. I hope that's an interesting result. :)

That's indeed an interesting result. Can you try to figure out which of 
the patches on the branch did the trick for you?

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-06-30  7:43                                         ` Christian König
@ 2014-07-01  6:48                                           ` Michel Dänzer
  2014-07-01 12:16                                             ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-07-01  6:48 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 30.06.2014 16:43, Christian König wrote:
> Am 30.06.2014 08:10, schrieb Michel Dänzer:
>> On 29.06.2014 19:34, Christian König wrote:
>>> I've just pushed the branch testing-3.15 to
>>> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
>>> and contains the "stop poisoning the GART TLB" patch backported to 3.15
>>> and a couple of things that I would like to try.
>> Running that branch, my Bonaire just survived a piglit run without
>> lockup. I hope that's an interesting result. :)
> 
> That's indeed an interesting result. Can you try to figure out which of
> the patches on the branch did the trick for you?

The winner is 'drm/radeon: completely over allocate PD and PTs'. That
patch alone on top of 3.15.2 makes piglit survive on my Bonaire.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-07-01  6:48                                           ` Michel Dänzer
@ 2014-07-01 12:16                                             ` Christian König
  2014-07-02  6:57                                               ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-07-01 12:16 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

Am 01.07.2014 08:48, schrieb Michel Dänzer:
> On 30.06.2014 16:43, Christian König wrote:
>> Am 30.06.2014 08:10, schrieb Michel Dänzer:
>>> On 29.06.2014 19:34, Christian König wrote:
>>>> I've just pushed the branch testing-3.15 to
>>>> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
>>>> and contains the "stop poisoning the GART TLB" patch backported to 3.15
>>>> and a couple of things that I would like to try.
>>> Running that branch, my Bonaire just survived a piglit run without
>>> lockup. I hope that's an interesting result. :)
>> That's indeed an interesting result. Can you try to figure out which of
>> the patches on the branch did the trick for you?
> The winner is 'drm/radeon: completely over allocate PD and PTs'. That
> patch alone on top of 3.15.2 makes piglit survive on my Bonaire.

Sounds like we either need to align the buffers a bit more, accidentally 
overwrite parts of them or indeed messed up their size calculation 
somewhere.

I've just pushed a new branch testing-3.15-v2 to 
git://people.freedesktop.org/~deathsimple/linux. It only contains the 
two patches already submitted for 3.15 inclusion and the "drm/radeon: 
completely over allocate PD and PTs" patch split into four separate changes.

Please retest and if it still works try once more which change fixed it. 
I'm going to try to purposely un-align the buffers on my bonaire in the 
meantime, maybe I get it to crash as well.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-07-01 12:16                                             ` Christian König
@ 2014-07-02  6:57                                               ` Michel Dänzer
  2014-07-02 19:31                                                 ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-07-02  6:57 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 01.07.2014 21:16, Christian König wrote:
> Am 01.07.2014 08:48, schrieb Michel Dänzer:
>> On 30.06.2014 16:43, Christian König wrote:
>>> Am 30.06.2014 08:10, schrieb Michel Dänzer:
>>>> On 29.06.2014 19:34, Christian König wrote:
>>>>> I've just pushed the branch testing-3.15 to
>>>>> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
>>>>> and contains the "stop poisoning the GART TLB" patch backported to
>>>>> 3.15
>>>>> and a couple of things that I would like to try.
>>>> Running that branch, my Bonaire just survived a piglit run without
>>>> lockup. I hope that's an interesting result. :)
>>> That's indeed an interesting result. Can you try to figure out which of
>>> the patches on the branch did the trick for you?
>> The winner is 'drm/radeon: completely over allocate PD and PTs'. That
>> patch alone on top of 3.15.2 makes piglit survive on my Bonaire.
> 
> Sounds like we either need to align the buffers a bit more, accidentally
> overwrite parts of them or indeed messed up their size calculation
> somewhere.
> 
> I've just pushed a new branch testing-3.15-v2 to
> git://people.freedesktop.org/~deathsimple/linux. It only contains the
> two patches already submitted for 3.15 inclusion and the "drm/radeon:
> completely over allocate PD and PTs" patch split into four separate
> changes.
> 
> Please retest and if it still works try once more which change fixed it.

It's hard to say, I'm afraid. I had a successful run with only the first
two of the split up changes, but then after both of them failing by
themselves, another run with both of them failed as well. So it seems
like both of those are required, but maybe not sufficient.

FWIW, I've also had successful runs with the first three of the split
changes, and with all of them.


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-07-02  6:57                                               ` Michel Dänzer
@ 2014-07-02 19:31                                                 ` Christian König
  2014-07-03  3:48                                                   ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2014-07-02 19:31 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

> FWIW, I've also had successful runs with the first three of the split
> changes, and with all of them.
Ok I've just pushed a branch testing-3.15-v3 to fdo which moves all page 
table allocation to the end of VRAM. Please try with this memory layout, 
it should give us a good idea if it's indeed a memory corruption or 
something else.

Apart from that please try to lockup your system with 
radeon.lockup_timeout=0 on the kernel commandline and then try to get a 
dump of the vm page tables with the script I've send to you in one of 
the mails.

Thanks for the help,
Christian.

Am 02.07.2014 08:57, schrieb Michel Dänzer:
> On 01.07.2014 21:16, Christian König wrote:
>> Am 01.07.2014 08:48, schrieb Michel Dänzer:
>>> On 30.06.2014 16:43, Christian König wrote:
>>>> Am 30.06.2014 08:10, schrieb Michel Dänzer:
>>>>> On 29.06.2014 19:34, Christian König wrote:
>>>>>> I've just pushed the branch testing-3.15 to
>>>>>> git://people.freedesktop.org/~deathsimple/linux. It's based on 3.15.2
>>>>>> and contains the "stop poisoning the GART TLB" patch backported to
>>>>>> 3.15
>>>>>> and a couple of things that I would like to try.
>>>>> Running that branch, my Bonaire just survived a piglit run without
>>>>> lockup. I hope that's an interesting result. :)
>>>> That's indeed an interesting result. Can you try to figure out which of
>>>> the patches on the branch did the trick for you?
>>> The winner is 'drm/radeon: completely over allocate PD and PTs'. That
>>> patch alone on top of 3.15.2 makes piglit survive on my Bonaire.
>> Sounds like we either need to align the buffers a bit more, accidentally
>> overwrite parts of them or indeed messed up their size calculation
>> somewhere.
>>
>> I've just pushed a new branch testing-3.15-v2 to
>> git://people.freedesktop.org/~deathsimple/linux. It only contains the
>> two patches already submitted for 3.15 inclusion and the "drm/radeon:
>> completely over allocate PD and PTs" patch split into four separate
>> changes.
>>
>> Please retest and if it still works try once more which change fixed it.
> It's hard to say, I'm afraid. I had a successful run with only the first
> two of the split up changes, but then after both of them failing by
> themselves, another run with both of them failed as well. So it seems
> like both of those are required, but maybe not sufficient.
>
> FWIW, I've also had successful runs with the first three of the split
> changes, and with all of them.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-07-02 19:31                                                 ` Christian König
@ 2014-07-03  3:48                                                   ` Michel Dänzer
  2014-07-03  6:36                                                     ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2014-07-03  3:48 UTC (permalink / raw)
  To: Christian König, Alex Deucher; +Cc: dri-devel

On 03.07.2014 04:31, Christian König wrote:
>> FWIW, I've also had successful runs with the first three of the split
>> changes, and with all of them.
> Ok I've just pushed a branch testing-3.15-v3 to fdo which moves all page
> table allocation to the end of VRAM. Please try with this memory layout,
> it should give us a good idea if it's indeed a memory corruption or
> something else.

That branch just survived piglit as well.


> Apart from that please try to lockup your system with
> radeon.lockup_timeout=0 on the kernel commandline and then try to get a
> dump of the vm page tables with the script I've send to you in one of
> the mails.

Any preference for which changes of which branch I should try this with?
E.g. with the two overalignment changes from testing-3.15-v2?


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/radeon: stop poisoning the GART TLB
  2014-07-03  3:48                                                   ` Michel Dänzer
@ 2014-07-03  6:36                                                     ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2014-07-03  6:36 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher; +Cc: dri-devel

Am 03.07.2014 05:48, schrieb Michel Dänzer:
> On 03.07.2014 04:31, Christian König wrote:
>>> FWIW, I've also had successful runs with the first three of the split
>>> changes, and with all of them.
>> Ok I've just pushed a branch testing-3.15-v3 to fdo which moves all page
>> table allocation to the end of VRAM. Please try with this memory layout,
>> it should give us a good idea if it's indeed a memory corruption or
>> something else.
> That branch just survived piglit as well.

Ok, so it's probably not an alignment issue but indeed a memory 
corruption (crap, the former would be easier to fix).

>> Apart from that please try to lockup your system with
>> radeon.lockup_timeout=0 on the kernel commandline and then try to get a
>> dump of the vm page tables with the script I've send to you in one of
>> the mails.
> Any preference for which changes of which branch I should try this with?
> E.g. with the two overalignment changes from testing-3.15-v2?

Just a blank 3.15 should be sufficient, I just want to take a look at 
the hexdump of the page tables to figure out what kind of memory 
corruption we have here.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2014-07-03  6:37 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-04 13:29 [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Christian König
2014-06-04 13:29 ` [PATCH 2/3] drm/radeon: remove range check from *_gart_set_page Christian König
2014-06-04 13:29 ` [PATCH 3/3] drm/radeon: use the SDMA on for buffer moves on CIK again Christian König
2014-06-04 13:46 ` [PATCH 1/3] drm/radeon: stop poisoning the GART TLB Alex Deucher
2014-06-04 13:50   ` Christian König
2014-06-10 23:30 ` Marek Olšák
2014-06-11  9:29   ` Christian König
2014-06-11 10:56     ` Marek Olšák
2014-06-12 11:23       ` Christian König
2014-06-13 13:19         ` Marek Olšák
2014-06-13 15:45           ` Christian König
2014-06-13 21:31             ` Alex Deucher
2014-06-15 12:48               ` Christian König
2014-06-19  1:48                 ` Michel Dänzer
2014-06-19  9:45                   ` Christian König
2014-06-23  8:15                     ` Michel Dänzer
2014-06-23  9:56                       ` Christian König
2014-06-24  6:49                         ` Michel Dänzer
2014-06-24 10:14                           ` Christian König
2014-06-25  3:59                             ` Michel Dänzer
2014-06-26 12:25                               ` Dieter Nützel
2014-06-27  2:31                               ` Michel Dänzer
2014-06-27  8:26                                 ` Christian König
2014-06-27  8:59                                   ` Michel Dänzer
2014-06-29 10:34                                     ` Christian König
2014-06-30  6:10                                       ` Michel Dänzer
2014-06-30  7:43                                         ` Christian König
2014-07-01  6:48                                           ` Michel Dänzer
2014-07-01 12:16                                             ` Christian König
2014-07-02  6:57                                               ` Michel Dänzer
2014-07-02 19:31                                                 ` Christian König
2014-07-03  3:48                                                   ` Michel Dänzer
2014-07-03  6:36                                                     ` Christian König
2014-06-19 10:20                   ` Marek Olšák
2014-06-19 10:25                     ` Christian König
2014-06-20  1:10                     ` Michel Dänzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.