All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alistair Popple <apopple@nvidia.com>
To: <akpm@linux-foundation.org>, <Felix.Kuehling@amd.com>,
	<linux-mm@kvack.org>, <rcampbell@nvidia.com>,
	<linux-ext4@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	Alex Sierra <alex.sierra@amd.com>
Cc: <amd-gfx@lists.freedesktop.org>,
	<dri-devel@lists.freedesktop.org>, <hch@lst.de>, <jgg@nvidia.com>,
	<jglisse@redhat.com>
Subject: Re: [PATCH v2 10/12] lib: add support for device public type in test_hmm
Date: Fri, 1 Oct 2021 11:32:07 +1000	[thread overview]
Message-ID: <5085060.BnOf7i5VvV@nvdebian> (raw)
In-Reply-To: <20210913161604.31981-11-alex.sierra@amd.com>

On Tuesday, 14 September 2021 2:16:02 AM AEST Alex Sierra wrote:
> Device Public type uses device memory that is coherently accesible by
> the CPU. This could be shown as SP (special purpose) memory range
> at the BIOS-e820 memory enumeration. If no SP memory is supported in
> system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
> 
> Currently, test_hmm only supports two different SP ranges of at least
> 256MB size. This could be specified in the kernel parameter variable
> efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 &
> 0x140000000 physical address. Ex.
> efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
> 
> Signed-off-by: Alex Sierra <alex.sierra@amd.com>
> ---
>  lib/test_hmm.c      | 166 +++++++++++++++++++++++++++-----------------
>  lib/test_hmm_uapi.h |  10 ++-
>  2 files changed, 113 insertions(+), 63 deletions(-)
> 
> diff --git a/lib/test_hmm.c b/lib/test_hmm.c
> index ef27e355738a..e346a48e2509 100644
> --- a/lib/test_hmm.c
> +++ b/lib/test_hmm.c
> @@ -469,6 +469,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
>  	unsigned long pfn_first;
>  	unsigned long pfn_last;
>  	void *ptr;
> +	int ret = -ENOMEM;
>  
>  	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
>  	if (!devmem)
> @@ -551,7 +552,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
>  	}
>  	spin_unlock(&mdevice->lock);
>  
> -	return true;
> +	return 0;
>  
>  err_release:
>  	mutex_unlock(&mdevice->devmem_lock);
> @@ -560,7 +561,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
>  err_devmem:
>  	kfree(devmem);
>  
> -	return false;
> +	return ret;
>  }
>  
>  static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
> @@ -569,8 +570,10 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
>  	struct page *rpage;
>  
>  	/*
> -	 * This is a fake device so we alloc real system memory to store
> -	 * our device memory.
> +	 * For ZONE_DEVICE private type, this is a fake device so we alloc real
> +	 * system memory to store our device memory.
> +	 * For ZONE_DEVICE public type we use the actual dpage to store the data
> +	 * and ignore rpage.
>  	 */
>  	rpage = alloc_page(GFP_HIGHUSER);
>  	if (!rpage)
> @@ -603,7 +606,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
>  					   struct dmirror *dmirror)
>  {
>  	struct dmirror_device *mdevice = dmirror->mdevice;
> -	const unsigned long *src = args->src;
> +	unsigned long *src = args->src;
>  	unsigned long *dst = args->dst;
>  	unsigned long addr;
>  
> @@ -621,12 +624,18 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
>  		 * unallocated pte_none() or read-only zero page.
>  		 */
>  		spage = migrate_pfn_to_page(*src);
> -
> +		if (spage && is_zone_device_page(spage)) {
> +			pr_debug("page already in device spage pfn: 0x%lx\n",
> +				  page_to_pfn(spage));
> +			*src &= ~MIGRATE_PFN_MIGRATE;

I don't think this is quite correct, callers shouldn't modify the src array. To
mark a page as not migrating callers need to set *dst = 0.

However I think this should be considered a test failure anyway. If we are
migrating from system to device memory we would have set
MIGRATE_VMA_SELECT_SYSTEM meaning no device private pages should be returned.
Therefore I don't think we can reach this unless there is a bug right?

> +			continue;
> +		}
>  		dpage = dmirror_devmem_alloc_page(mdevice);
>  		if (!dpage)
>  			continue;
>  
> -		rpage = dpage->zone_device_data;
> +		rpage = is_device_private_page(dpage) ? dpage->zone_device_data :
> +							dpage;
>  		if (spage)
>  			copy_highpage(rpage, spage);
>  		else
> @@ -638,8 +647,10 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
>  		 * the simulated device memory and that page holds the pointer
>  		 * to the mirror.
>  		 */
> +		rpage = dpage->zone_device_data;
>  		rpage->zone_device_data = dmirror;
> -
> +		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
> +			 page_to_pfn(spage), page_to_pfn(dpage));
>  		*dst = migrate_pfn(page_to_pfn(dpage)) |
>  			    MIGRATE_PFN_LOCKED;
>  		if ((*src & MIGRATE_PFN_WRITE) ||
> @@ -673,10 +684,13 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
>  			continue;
>  
>  		/*
> -		 * Store the page that holds the data so the page table
> -		 * doesn't have to deal with ZONE_DEVICE private pages.
> +		 * For ZONE_DEVICE private pages we store the page that
> +		 * holds the data so the page table doesn't have to deal it.
> +		 * For ZONE_DEVICE public pages we store the actual page, since
> +		 * the CPU has coherent access to the page.
>  		 */
> -		entry = dpage->zone_device_data;
> +		entry = is_device_private_page(dpage) ? dpage->zone_device_data :
> +							dpage;
>  		if (*dst & MIGRATE_PFN_WRITE)
>  			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
>  		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
> @@ -690,6 +704,47 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
>  	return 0;
>  }
>  
> +static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
> +						      struct dmirror *dmirror)
> +{
> +	unsigned long *src = args->src;
> +	unsigned long *dst = args->dst;
> +	unsigned long start = args->start;
> +	unsigned long end = args->end;
> +	unsigned long addr;
> +
> +	for (addr = start; addr < end; addr += PAGE_SIZE,
> +				       src++, dst++) {
> +		struct page *dpage, *spage;
> +
> +		spage = migrate_pfn_to_page(*src);
> +		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
> +			continue;
> +		if (is_device_private_page(spage)) {
> +			spage = spage->zone_device_data;
> +		} else {
> +			pr_debug("page already in system or SPM spage pfn: 0x%lx\n",
> +				  page_to_pfn(spage));
> +			*src &= ~MIGRATE_PFN_MIGRATE;

Same comment as above - shouldn't touch *src and also shouldn't be able to get
here anyway.

> +			continue;
> +		}
> +		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
> +		if (!dpage)
> +			continue;
> +		pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
> +			 page_to_pfn(spage), page_to_pfn(dpage));
> +
> +		lock_page(dpage);
> +		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
> +		copy_highpage(dpage, spage);
> +		*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> +		if (*src & MIGRATE_PFN_WRITE)
> +			*dst |= MIGRATE_PFN_WRITE;
> +	}
> +	return 0;
> +}
> +
> +
>  static int dmirror_migrate(struct dmirror *dmirror,
>  			   struct hmm_dmirror_cmd *cmd)
>  {
> @@ -731,33 +786,46 @@ static int dmirror_migrate(struct dmirror *dmirror,
>  		args.start = addr;
>  		args.end = next;
>  		args.pgmap_owner = dmirror->mdevice;
> -		args.flags = MIGRATE_VMA_SELECT_SYSTEM;
> +		args.flags = (!cmd->alloc_to_devmem &&
> +			     dmirror->mdevice->zone_device_type ==
> +			     HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
> +			     MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
> +			     MIGRATE_VMA_SELECT_SYSTEM;
>  		ret = migrate_vma_setup(&args);
>  		if (ret)
>  			goto out;
>  
> -		dmirror_migrate_alloc_and_copy(&args, dmirror);
> +		if (cmd->alloc_to_devmem) {
> +			pr_debug("Migrating from sys mem to device mem\n");
> +			dmirror_migrate_alloc_and_copy(&args, dmirror);
> +		} else {
> +			pr_debug("Migrating from device mem to sys mem\n");
> +			dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
> +		}
>  		migrate_vma_pages(&args);
> -		dmirror_migrate_finalize_and_map(&args, dmirror);
> +		if (cmd->alloc_to_devmem)
> +			dmirror_migrate_finalize_and_map(&args, dmirror);
>  		migrate_vma_finalize(&args);
>  	}
>  	mmap_read_unlock(mm);
>  	mmput(mm);
>  
> -	/* Return the migrated data for verification. */
> -	ret = dmirror_bounce_init(&bounce, start, size);
> -	if (ret)
> -		return ret;
> -	mutex_lock(&dmirror->mutex);
> -	ret = dmirror_do_read(dmirror, start, end, &bounce);
> -	mutex_unlock(&dmirror->mutex);
> -	if (ret == 0) {
> -		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
> -				 bounce.size))
> -			ret = -EFAULT;
> +	/* Return the migrated data for verification. only for pages in device zone */
> +	if (cmd->alloc_to_devmem) {
> +		ret = dmirror_bounce_init(&bounce, start, size);
> +		if (ret)
> +			return ret;
> +		mutex_lock(&dmirror->mutex);
> +		ret = dmirror_do_read(dmirror, start, end, &bounce);
> +		mutex_unlock(&dmirror->mutex);
> +		if (ret == 0) {
> +			if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
> +					 bounce.size))
> +				ret = -EFAULT;
> +		}
> +		cmd->cpages = bounce.cpages;
> +		dmirror_bounce_fini(&bounce);
>  	}
> -	cmd->cpages = bounce.cpages;
> -	dmirror_bounce_fini(&bounce);
>  	return ret;

Rather than passing a flag (alloc_to_devmem) can you split this into two
functions - one to migrate to device memory and one to migrate to system
memory?

>  out:
> @@ -781,9 +849,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
>  	}
>  
>  	page = hmm_pfn_to_page(entry);
> -	if (is_device_private_page(page)) {
> -		/* Is the page migrated to this device or some other? */
> -		if (dmirror->mdevice == dmirror_page_to_device(page))
> +	if (is_device_page(page)) {
> +		/* Is page ZONE_DEVICE public? */
> +		if (!is_device_private_page(page))
> +			*perm = HMM_DMIRROR_PROT_DEV_PUBLIC;
> +		/*
> +		 * Is page ZONE_DEVICE private migrated to
> +		 * this device or some other?
> +		 */
> +		else if (dmirror->mdevice == dmirror_page_to_device(page))
>  			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
>  		else
>  			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
> @@ -1030,38 +1104,6 @@ static void dmirror_devmem_free(struct page *page)
>  	spin_unlock(&mdevice->lock);
>  }
>  
> -static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
> -						      struct dmirror *dmirror)
> -{
> -	const unsigned long *src = args->src;
> -	unsigned long *dst = args->dst;
> -	unsigned long start = args->start;
> -	unsigned long end = args->end;
> -	unsigned long addr;
> -
> -	for (addr = start; addr < end; addr += PAGE_SIZE,
> -				       src++, dst++) {
> -		struct page *dpage, *spage;
> -
> -		spage = migrate_pfn_to_page(*src);
> -		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
> -			continue;
> -		spage = spage->zone_device_data;
> -
> -		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
> -		if (!dpage)
> -			continue;
> -
> -		lock_page(dpage);
> -		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
> -		copy_highpage(dpage, spage);
> -		*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> -		if (*src & MIGRATE_PFN_WRITE)
> -			*dst |= MIGRATE_PFN_WRITE;
> -	}
> -	return 0;
> -}
> -
>  static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
>  {
>  	struct migrate_vma args;
> diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
> index 00259d994410..b6cb8a7d2470 100644
> --- a/lib/test_hmm_uapi.h
> +++ b/lib/test_hmm_uapi.h
> @@ -17,8 +17,12 @@
>   * @addr: (in) user address the device will read/write
>   * @ptr: (in) user address where device data is copied to/from
>   * @npages: (in) number of pages to read/write
> + * @alloc_to_devmem: (in) desired allocation destination during migration.
> + * True if allocation is to device memory.
> + * False if allocation is to system memory.
>   * @cpages: (out) number of pages copied
>   * @faults: (out) number of device page faults seen
> + * @zone_device_type: (out) zone device memory type
>   */
>  struct hmm_dmirror_cmd {
>  	__u64		addr;
> @@ -26,7 +30,8 @@ struct hmm_dmirror_cmd {
>  	__u64		npages;
>  	__u64		cpages;
>  	__u64		faults;
> -	__u64		zone_device_type;
> +	__u32		zone_device_type;
> +	__u32		alloc_to_devmem;

Similar comment here. Rather than add a boolean flag to every command could you
rename the existing command to HMM_DMIRROR_MIGRATE_TO_DEV and add another
(HMM_DMIRROR_MIGRATE_TO_SYS) for this new operation? I think that would end up
being a bit cleaner and matches how this actually gets used in hmm-test.c where
you end up defining hmm_migrate_sys_to_dev() and hmm_migrate_to_dev_sys()
anyway.

 - Alistair

>  };
>  
>  /* Expose the address space of the calling process through hmm device file */
> @@ -49,6 +54,8 @@ struct hmm_dmirror_cmd {
>   *					device the ioctl() is made
>   * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
>   *					other device
> + * HMM_DMIRROR_PROT_DEV_PUBLIC: Migrate device public page on the device
> + *				 the ioctl() is made
>   */
>  enum {
>  	HMM_DMIRROR_PROT_ERROR			= 0xFF,
> @@ -60,6 +67,7 @@ enum {
>  	HMM_DMIRROR_PROT_ZERO			= 0x10,
>  	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,
>  	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30,
> +	HMM_DMIRROR_PROT_DEV_PUBLIC		= 0x40,
>  };
>  
>  enum {
> 





  reply	other threads:[~2021-10-01  1:32 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-13 16:15 [PATCH v2 00/12] MEMORY_DEVICE_PUBLIC for CPU-accessible coherent device memory Alex Sierra
2021-09-13 16:15 ` [PATCH v2 01/12] ext4/xfs: add page refcount helper Alex Sierra
2021-09-13 16:15 ` [PATCH v2 02/12] mm: remove extra ZONE_DEVICE struct page refcount Alex Sierra
2021-09-15  5:32   ` Ralph Campbell
2021-09-15  5:32     ` Ralph Campbell
2021-09-13 16:15 ` [PATCH v2 03/12] mm: add zone device public type memory support Alex Sierra
2021-09-13 16:15 ` [PATCH v2 04/12] mm: add device public vma selection for memory migration Alex Sierra
2021-09-13 16:15 ` [PATCH v2 05/12] drm/amdkfd: ref count init for device pages Alex Sierra
2021-09-13 16:15 ` [PATCH v2 06/12] drm/amdkfd: add SPM support for SVM Alex Sierra
2021-09-13 16:15 ` [PATCH v2 07/12] drm/amdkfd: public type as sys mem on migration to ram Alex Sierra
2021-09-13 16:16 ` [PATCH v2 08/12] lib: test_hmm add ioctl to get zone device type Alex Sierra
2021-09-13 16:16 ` [PATCH v2 09/12] lib: test_hmm add module param for " Alex Sierra
2021-09-20  8:53   ` Alistair Popple
2021-09-20 20:05     ` Sierra Guiza, Alejandro (Alex)
2021-09-21  5:14       ` Alistair Popple
2021-09-23 15:52         ` Sierra Guiza, Alejandro (Alex)
2021-10-01  0:28           ` Alistair Popple
2021-09-13 16:16 ` [PATCH v2 10/12] lib: add support for device public type in test_hmm Alex Sierra
2021-10-01  1:32   ` Alistair Popple [this message]
2021-09-13 16:16 ` [PATCH v2 11/12] tools: update hmm-test to support device public type Alex Sierra
2021-09-13 16:16 ` [PATCH v2 12/12] tools: update test_hmm script to support SP config Alex Sierra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5085060.BnOf7i5VvV@nvdebian \
    --to=apopple@nvidia.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hch@lst.de \
    --cc=jgg@nvidia.com \
    --cc=jglisse@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=rcampbell@nvidia.com \
    --subject='Re: [PATCH v2 10/12] lib: add support for device public type in test_hmm' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.