All of lore.kernel.org
 help / color / mirror / Atom feed
From: Szymon Andrzejuk <s.andrzejuk@samsung.com>
To: Andrzej Hajda <a.hajda@samsung.com>, dri-devel@lists.freedesktop.org
Cc: p.rak@samsung.com, Deepak Sharma <deepak.sharma@amd.com>,
	David Airlie <airlied@linux.ie>,
	andrzejuk.szymon@gmail.com, Sean Paul <seanpaul@chromium.org>,
	Emil Velikov <emil.velikov@collabora.com>
Subject: Re: [PATCH] drm/vgem: Added page prefaulting
Date: Thu, 5 Sep 2019 14:44:05 +0200	[thread overview]
Message-ID: <a1aa12d9-e66c-07bb-b441-60451dc33265@samsung.com> (raw)
In-Reply-To: <d6792ae4-e4e4-f585-10f8-af9bfa232b23@samsung.com>

Thank you for the review Andrzej. I'll update the patch to v2 shortly, it should
cover all your comments.

FYI, I'll be on holiday until September 16 so I might not be able to respond in the
following days.

Regards,
Szymon

On 03.09.2019 15:19, Andrzej Hajda wrote:
> +CC: $(./script/get_maintainers.pl drivers/gpu/drm/vgem/vgem_drv.c)
>
>
> On 20.08.2019 08:58, Szymon Andrzejuk wrote:
>> Page fault handler inside vgem driver now preallocates pages in advance
>> when fault occurs for the first time. Pages can be allocated in
>> direction of increasing/decreasing addresses, depending on memory access
>> profile. In case of random access no preallocation occurs.
>>
>> Synthetic benchmark showed over 8x bandwidth increase when copying data
>> from mmapped vgem buffer with memcpy and ~160 times when accessing mapped
>> buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag.
>> Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers
>> sees ~17% FPS increase.
>>
>> This performance increase only occurs when accessing vgem buffer mapped
>> using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported
>> from prime fd the vgem page fault handler is not invoked. It's advised
>> to use vector streaming copy instructions and avoid sequential accesses
>> in this case. Streaming copy brings the performance to be on par with
>> similar buffer allocated with memfd_create(2) syscall.
>>
>> Signed-off-by: Szymon Andrzejuk <s.andrzejuk@samsung.com>
>> ---
>>  drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------
>>  1 file changed, 143 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
>> index 11a8f99ba18c..739ba841e89c 100644
>> --- a/drivers/gpu/drm/vgem/vgem_drv.c
>> +++ b/drivers/gpu/drm/vgem/vgem_drv.c
>> @@ -34,6 +34,7 @@
>>  #include <linux/ramfs.h>
>>  #include <linux/shmem_fs.h>
>>  #include <linux/dma-buf.h>
>> +#include <linux/pfn_t.h>
>>  #include "vgem_drv.h"
>>  
>>  #define DRIVER_NAME	"vgem"
>> @@ -50,8 +51,21 @@ static struct vgem_device {
>>  static void vgem_gem_free_object(struct drm_gem_object *obj)
>>  {
>>  	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
>> +	int i;
>> +
>> +	mutex_lock(&vgem_obj->pages_lock);
>> +	if (vgem_obj->pages) {
>> +		int num_pages = obj->size >> PAGE_SHIFT;
>> +
>> +		for (i = 0; i < num_pages; i++) {
>> +			if (vgem_obj->pages[i])
>> +				put_page(vgem_obj->pages[i]);
>> +		}
>> +		kvfree(vgem_obj->pages);
>> +		vgem_obj->pages = NULL;
>> +	}
>> +	mutex_unlock(&vgem_obj->pages_lock);
>>  
>> -	kvfree(vgem_obj->pages);
>>  	mutex_destroy(&vgem_obj->pages_lock);
>>  
>>  	if (obj->import_attach)
>> @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object *obj)
>>  	kfree(vgem_obj);
>>  }
>>  
>> +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma,
>> +							 unsigned long vaddr, int page_num)
>
> unused page_num
>
>
>> +{
>> +	unsigned long pfn;
>> +	int insert_ret;
>> +
>> +	pfn = page_to_pfn(page);
>> +	insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV));
>> +
>> +	if (insert_ret & VM_FAULT_ERROR)
>> +		return VM_FAULT_ERROR;
>
> Is it OK to return mask? instead of insert_ret.
>
>
>> +
>> +	return 0;
>> +}
>> +
>> +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj,
>> +									int page_num, struct page **page)
>> +{
>> +	int ret;
>> +	struct page *mapped_page;
>> +
>> +	mapped_page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
>> +										  page_num);
>> +	if (IS_ERR(page)) {
>> +		switch (PTR_ERR(page)) {
>> +		case -ENOSPC:
>> +		case -ENOMEM:
>> +			ret = VM_FAULT_OOM;
>> +			break;
>> +		case -EBUSY:
>> +			ret = VM_FAULT_RETRY;
>> +			break;
>> +		case -EFAULT:
>> +		case -EINVAL:
>> +			ret = VM_FAULT_SIGBUS;
>> +			break;
>> +		default:
>> +			WARN_ON(PTR_ERR(page));
>> +			ret = VM_FAULT_SIGBUS;
>> +			break;
>> +		}
>> +
>> +		return ret;
>> +	}
>> +
>> +	*page = mapped_page;
>> +	return 0;
>> +}
>> +
>> +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj,
>> +									  struct vm_area_struct *vma,
>> +									  int page_num, unsigned long vaddr)
>> +{
>> +	int ret;
>> +
>> +	ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num);
>> +	if (ret)
>> +		return ret;
>
> One can use return __vgem_alloc_page(...), up to you.
>
>
>> +
>> +	return 0;
>> +}
>> +
>>  static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>  {
>>  	struct vm_area_struct *vma = vmf->vma;
>> @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>  	vm_fault_t ret = VM_FAULT_SIGBUS;
>>  	loff_t num_pages;
>>  	pgoff_t page_offset;
>> +	int page_num, page_prep_ret;
>> +	const int PREFAULT_PAGES = 8;
>>  	page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT;
>>  
>>  	num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
>> @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>>  	if (page_offset >= num_pages)
>>  		return VM_FAULT_SIGBUS;
>>  
>> +	ret = VM_FAULT_NOPAGE;
>> +
>>  	mutex_lock(&obj->pages_lock);
>> -	if (obj->pages) {
>> -		get_page(obj->pages[page_offset]);
>> -		vmf->page = obj->pages[page_offset];
>> -		ret = 0;
>> -	}
>> -	mutex_unlock(&obj->pages_lock);
>> -	if (ret) {
>> -		struct page *page;
>> -
>> -		page = shmem_read_mapping_page(
>> -					file_inode(obj->base.filp)->i_mapping,
>> -					page_offset);
>> -		if (!IS_ERR(page)) {
>> -			vmf->page = page;
>> -			ret = 0;
>> -		} else switch (PTR_ERR(page)) {
>> -			case -ENOSPC:
>> -			case -ENOMEM:
>> -				ret = VM_FAULT_OOM;
>> -				break;
>> -			case -EBUSY:
>> -				ret = VM_FAULT_RETRY;
>> -				break;
>> -			case -EFAULT:
>> -			case -EINVAL:
>> -				ret = VM_FAULT_SIGBUS;
>> -				break;
>> -			default:
>> -				WARN_ON(PTR_ERR(page));
>> -				ret = VM_FAULT_SIGBUS;
>> -				break;
>> +
>> +	if (num_pages > 1) {
>> +		bool forward = true;
>> +		bool random = false;
>> +
>> +		// Determine prefaulting direction. If adjacent pages are both
>> +		// allocated/not allocated then we have random access.
>> +		// Always try to prefault on first and last page.
>> +		if (page_offset != 0 && page_offset != num_pages - 1) {
>> +			struct page *next, *prev;
>> +
>> +			next = obj->pages[page_offset + 1];
>> +			prev = obj->pages[page_offset - 1];
>> +			if (!((uintptr_t)next ^ (uintptr_t)prev))
>> +				random = true;
>> +			else if (!prev)
>> +				forward = false;
>> +		} else {
>> +			forward = (page_offset == 0);
>>  		}
>
> Quite complicated,  maybe sth like this:
>
>     bool next, prev;
>
>     next = obj->pages[page_offset + 1];
>
>     prev = obj->pages[page_offset - 1];
>
>     if (prev == next)
>
>         random = true;
>
>
>>  
>> +		if (!random) {
>> +			for (page_num = page_offset;
>> +				forward ? page_num < page_offset + PREFAULT_PAGES && page_num < num_pages :
>> +				page_offset - page_num < PREFAULT_PAGES && page_num >= 0;
>> +				forward ? page_num++ : page_num--) {
>
> Again complicated, try pre-calculate boundaries and:
>
> start_page = ...;
>
> end_page = ...;
>
> step = forward ? 1 : -1;
>
> for (page_num = start_page; page_num < end_page; page_num += step)
>
>     ...
>
>
>> +				if (!obj->pages[page_num]) {
>> +					page_prep_ret = __vgem_prepare_single_page(obj, vma, page_num, vaddr);
>> +					if (page_prep_ret) {
>> +						ret = page_prep_ret;
>> +						break;
>> +					}
>> +				} else {
>> +					// random access, exit loop
>> +					break;
>> +				}
>> +
>> +				vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE;
>
> vaddr += step * PAGE_SIZE;
>
>
> Regards
>
> Andrzej
>
>
>> +			}
>> +		} else {
>> +			page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr);
>> +			if (page_prep_ret)
>> +				ret = page_prep_ret;
>> +		}
>> +	} else {
>> +		page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr);
>> +		if (page_prep_ret)
>> +			ret = page_prep_ret;
>>  	}
>> +
>> +	mutex_unlock(&obj->pages_lock);
>>  	return ret;
>>  }
>>  
>> @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
>>  					      unsigned long size)
>>  {
>>  	struct drm_vgem_gem_object *obj;
>> -	int ret;
>> +	int ret, num_pages;
>>  
>>  	obj = __vgem_gem_create(dev, size);
>>  	if (IS_ERR(obj))
>> @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
>>  	if (ret)
>>  		return ERR_PTR(ret);
>>  
>> +	mutex_lock(&obj->pages_lock);
>> +
>> +	num_pages = obj->base.size >> PAGE_SHIFT;
>> +	obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
>> +
>> +	mutex_unlock(&obj->pages_lock);
>> +
>>  	return &obj->base;
>>  }
>>  
>> @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
>>  	/* Keep the WC mmaping set by drm_gem_mmap() but our pages
>>  	 * are ordinary and not special.
>>  	 */
>> -	vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP;
>> +	vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP;
>> +
>>  	return 0;
>>  }
>>  
>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

      reply	other threads:[~2019-09-05 12:44 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20190820065832eucas1p182359d7501af8c027e7f3a4f4ad4d5e3@eucas1p1.samsung.com>
     [not found] ` <20190820065830.GA11255@AMDC3964.digital.local>
2019-09-03 13:19   ` [PATCH] drm/vgem: Added page prefaulting Andrzej Hajda
2019-09-05 12:44     ` Szymon Andrzejuk [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a1aa12d9-e66c-07bb-b441-60451dc33265@samsung.com \
    --to=s.andrzejuk@samsung.com \
    --cc=a.hajda@samsung.com \
    --cc=airlied@linux.ie \
    --cc=andrzejuk.szymon@gmail.com \
    --cc=deepak.sharma@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=emil.velikov@collabora.com \
    --cc=p.rak@samsung.com \
    --cc=seanpaul@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.