All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] radeon: use vmalloc instead of kmalloc
@ 2009-06-22 17:26 Jerome Glisse
  2009-06-23 11:35 ` Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Jerome Glisse @ 2009-06-22 17:26 UTC (permalink / raw)
  To: airlied; +Cc: dri-devel, linux-kernel, Jerome Glisse

We don't need to allocated contiguous pages in cs codepath
so use vmalloc instead.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_cs.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index b843f9b..54a9740 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -45,11 +45,11 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	chunk = &p->chunks[p->chunk_relocs_idx];
 	/* FIXME: we assume that each relocs use 4 dwords */
 	p->nrelocs = chunk->length_dw / 4;
-	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
+	p->relocs_ptr = vmalloc(p->nrelocs * sizeof(void *));
 	if (p->relocs_ptr == NULL) {
 		return -ENOMEM;
 	}
-	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	p->relocs = vmalloc(p->nrelocs * sizeof(struct radeon_cs_reloc));
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
@@ -103,7 +103,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	p->idx = 0;
 	p->chunk_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
-	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
+	p->chunks_array = vmalloc(cs->num_chunks * sizeof(uint64_t));
 	if (p->chunks_array == NULL) {
 		return -ENOMEM;
 	}
@@ -113,7 +113,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 		return -EFAULT;
 	}
 	p->nchunks = cs->num_chunks;
-	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
+	p->chunks = vmalloc(p->nchunks * sizeof(struct radeon_cs_chunk));
 	if (p->chunks == NULL) {
 		return -ENOMEM;
 	}
@@ -139,7 +139,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 
 		p->chunks[i].kdata = NULL;
 		size = p->chunks[i].length_dw * sizeof(uint32_t);
-		p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
+		p->chunks[i].kdata = vmalloc(size);
 		if (p->chunks[i].kdata == NULL) {
 			return -ENOMEM;
 		}
@@ -179,13 +179,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 			mutex_unlock(&parser->rdev->ddev->struct_mutex);
 		}
 	}
-	kfree(parser->relocs);
-	kfree(parser->relocs_ptr);
+	vfree(parser->relocs);
+	vfree(parser->relocs_ptr);
 	for (i = 0; i < parser->nchunks; i++) {
-		kfree(parser->chunks[i].kdata);
+		vfree(parser->chunks[i].kdata);
 	}
-	kfree(parser->chunks);
-	kfree(parser->chunks_array);
+	vfree(parser->chunks);
+	vfree(parser->chunks_array);
 	radeon_ib_free(parser->rdev, &parser->ib);
 }
 
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] radeon: use vmalloc instead of kmalloc
  2009-06-22 17:26 [PATCH] radeon: use vmalloc instead of kmalloc Jerome Glisse
@ 2009-06-23 11:35 ` Peter Zijlstra
  2009-06-23 11:42   ` Dave Airlie
  0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2009-06-23 11:35 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: airlied, dri-devel, linux-kernel

On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> We don't need to allocated contiguous pages in cs codepath
> so use vmalloc instead.

Best would be to not require >PAGE_SIZE allocations at all of course.
But barring that, it would be great to have something like:

  ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
  if (!ptr)
    ptr = vmalloc(size);

Also, how long do these allocations live? vmalloc space can be quite
limited (i386) and it can fragment too.

> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> ---
>  drivers/gpu/drm/radeon/radeon_cs.c |   20 ++++++++++----------
>  1 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index b843f9b..54a9740 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -45,11 +45,11 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>  	chunk = &p->chunks[p->chunk_relocs_idx];
>  	/* FIXME: we assume that each relocs use 4 dwords */
>  	p->nrelocs = chunk->length_dw / 4;
> -	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
> +	p->relocs_ptr = vmalloc(p->nrelocs * sizeof(void *));
>  	if (p->relocs_ptr == NULL) {
>  		return -ENOMEM;
>  	}
> -	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
> +	p->relocs = vmalloc(p->nrelocs * sizeof(struct radeon_cs_reloc));
>  	if (p->relocs == NULL) {
>  		return -ENOMEM;
>  	}
> @@ -103,7 +103,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>  	p->idx = 0;
>  	p->chunk_ib_idx = -1;
>  	p->chunk_relocs_idx = -1;
> -	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
> +	p->chunks_array = vmalloc(cs->num_chunks * sizeof(uint64_t));
>  	if (p->chunks_array == NULL) {
>  		return -ENOMEM;
>  	}
> @@ -113,7 +113,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>  		return -EFAULT;
>  	}
>  	p->nchunks = cs->num_chunks;
> -	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
> +	p->chunks = vmalloc(p->nchunks * sizeof(struct radeon_cs_chunk));
>  	if (p->chunks == NULL) {
>  		return -ENOMEM;
>  	}
> @@ -139,7 +139,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>  
>  		p->chunks[i].kdata = NULL;
>  		size = p->chunks[i].length_dw * sizeof(uint32_t);
> -		p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
> +		p->chunks[i].kdata = vmalloc(size);
>  		if (p->chunks[i].kdata == NULL) {
>  			return -ENOMEM;
>  		}
> @@ -179,13 +179,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
>  			mutex_unlock(&parser->rdev->ddev->struct_mutex);
>  		}
>  	}
> -	kfree(parser->relocs);
> -	kfree(parser->relocs_ptr);
> +	vfree(parser->relocs);
> +	vfree(parser->relocs_ptr);
>  	for (i = 0; i < parser->nchunks; i++) {
> -		kfree(parser->chunks[i].kdata);
> +		vfree(parser->chunks[i].kdata);
>  	}
> -	kfree(parser->chunks);
> -	kfree(parser->chunks_array);
> +	vfree(parser->chunks);
> +	vfree(parser->chunks_array);
>  	radeon_ib_free(parser->rdev, &parser->ib);
>  }
>  

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] radeon: use vmalloc instead of kmalloc
  2009-06-23 11:35 ` Peter Zijlstra
@ 2009-06-23 11:42   ` Dave Airlie
  2009-06-23 12:23     ` Jerome Glisse
                       ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Dave Airlie @ 2009-06-23 11:42 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jerome Glisse, dri-devel, linux-kernel

On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<peterz@infradead.org> wrote:
> On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
>> We don't need to allocated contiguous pages in cs codepath
>> so use vmalloc instead.
>
> Best would be to not require >PAGE_SIZE allocations at all of course.

It gets messy when you have copy from user and spinlocks, it would be nice
to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
a lock we probably need to hold.

> But barring that, it would be great to have something like:
>
>  ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
>  if (!ptr)
>    ptr = vmalloc(size);

we have a drm_calloc_large workaround already for the Intel driver which also
need this.
>
> Also, how long do these allocations live? vmalloc space can be quite
> limited (i386) and it can fragment too.

Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
that might be quite large, mainly the fbcon framebuffer (up to 8MB in
some cases)

Dave.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] radeon: use vmalloc instead of kmalloc
  2009-06-23 11:42   ` Dave Airlie
@ 2009-06-23 12:23     ` Jerome Glisse
  2009-06-23 12:34     ` Peter Zijlstra
  2009-06-23 15:14     ` Thomas Hellström
  2 siblings, 0 replies; 6+ messages in thread
From: Jerome Glisse @ 2009-06-23 12:23 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Peter Zijlstra, Jerome Glisse, dri-devel, linux-kernel

On Tue, 2009-06-23 at 21:42 +1000, Dave Airlie wrote:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<peterz@infradead.org> wrote:
> > On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> >> We don't need to allocated contiguous pages in cs codepath
> >> so use vmalloc instead.
> >
> > Best would be to not require >PAGE_SIZE allocations at all of course.
> 
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.
> 
> > But barring that, it would be great to have something like:
> >
> >  ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
> >  if (!ptr)
> >    ptr = vmalloc(size);
> 
> we have a drm_calloc_large workaround already for the Intel driver which also
> need this.
> >
> > Also, how long do these allocations live? vmalloc space can be quite
> > limited (i386) and it can fragment too.
> 
> Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
> that might be quite large, mainly the fbcon framebuffer (up to 8MB in
> some cases)
> 
> Dave.


I will rework cs parsing a bit see with what i can come up with. Forget
this patch, i will get back with a new one.

Jerome


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] radeon: use vmalloc instead of kmalloc
  2009-06-23 11:42   ` Dave Airlie
  2009-06-23 12:23     ` Jerome Glisse
@ 2009-06-23 12:34     ` Peter Zijlstra
  2009-06-23 15:14     ` Thomas Hellström
  2 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2009-06-23 12:34 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Jerome Glisse, dri-devel, linux-kernel

On Tue, 2009-06-23 at 21:42 +1000, Dave Airlie wrote:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<peterz@infradead.org> wrote:
> > On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> >> We don't need to allocated contiguous pages in cs codepath
> >> so use vmalloc instead.
> >
> > Best would be to not require >PAGE_SIZE allocations at all of course.
> 
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.

Can't you play tricks with refcounts?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] radeon: use vmalloc instead of kmalloc
  2009-06-23 11:42   ` Dave Airlie
  2009-06-23 12:23     ` Jerome Glisse
  2009-06-23 12:34     ` Peter Zijlstra
@ 2009-06-23 15:14     ` Thomas Hellström
  2 siblings, 0 replies; 6+ messages in thread
From: Thomas Hellström @ 2009-06-23 15:14 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Peter Zijlstra, Jerome Glisse, linux-kernel, dri-devel

Dave Airlie skrev:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<peterz@infradead.org> wrote:
>   
>> On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
>>     
>>> We don't need to allocated contiguous pages in cs codepath
>>> so use vmalloc instead.
>>>       
>> Best would be to not require >PAGE_SIZE allocations at all of course.
>>     
>
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.
>
>   
>> But barring that, it would be great to have something like:
>>
>>  ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
>>  if (!ptr)
>>    ptr = vmalloc(size);
>>     
>
> we have a drm_calloc_large workaround already for the Intel driver which also
> need this.
>   
One problem with multiple vmallocs per command submission is 
performance. Judging from previous work, drivers that are doing this 
tend to get very cpu-hungry. Since Radeon is only allowing a single 
process into the command submission path at once, what about 
pre-allocating a single larger vmalloced buffer at first command 
submission and take care to flush user-space before the submitted 
command stream gets too big.

>> Also, how long do these allocations live? vmalloc space can be quite
>> limited (i386) and it can fragment too.
>>     
>
> Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
> that might be quite large, mainly the fbcon framebuffer (up to 8MB in
> some cases)
>   
That one would be ioremapped, not vmapped right? Not that it matters 
because it's using vmalloc space anyway, but it wouldn't be worse than a 
traditionally ioremapped framebuffer.

Thomas

> Dave.
>
> ------------------------------------------------------------------------------
> Are you an open source citizen? Join us for the Open Source Bridge conference!
> Portland, OR, June 17-19. Two days of sessions, one day of unconference: $250.
> Need another reason to go? 24-hour hacker lounge. Register today!
> http://ad.doubleclick.net/clk;215844324;13503038;v?http://opensourcebridge.org
> --
> _______________________________________________
> Dri-devel mailing list
> Dri-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/dri-devel
>   


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-06-23 15:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-22 17:26 [PATCH] radeon: use vmalloc instead of kmalloc Jerome Glisse
2009-06-23 11:35 ` Peter Zijlstra
2009-06-23 11:42   ` Dave Airlie
2009-06-23 12:23     ` Jerome Glisse
2009-06-23 12:34     ` Peter Zijlstra
2009-06-23 15:14     ` Thomas Hellström

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.