From: Alex Sierra <alex.sierra@amd.com>
To: akpm@linux-foundation.org, Felix.Kuehling@amd.com,
linux-mm@kvack.org, rcampbell@nvidia.com,
linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org
Cc: amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
hch@lst.de, jgg@nvidia.com, jglisse@redhat.com
Subject: [PATCH v1 12/14] lib: add support for device public type in test_hmm
Date: Tue, 24 Aug 2021 22:48:26 -0500 [thread overview]
Message-ID: <20210825034828.12927-13-alex.sierra@amd.com> (raw)
In-Reply-To: <20210825034828.12927-1-alex.sierra@amd.com>
Device Public type uses device memory that is coherently accesible by
the CPU. This could be shown as SP (special purpose) memory range
at the BIOS-e820 memory enumeration. If no SP memory is supported in
system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
Currently, test_hmm only supports two different SP ranges of at least
256MB size. This could be specified in the kernel parameter variable
efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 &
0x140000000 physical address. Ex.
efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
---
lib/test_hmm.c | 166 +++++++++++++++++++++++++++-----------------
lib/test_hmm_uapi.h | 10 ++-
2 files changed, 113 insertions(+), 63 deletions(-)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index ef27e355738a..e346a48e2509 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -469,6 +469,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
unsigned long pfn_first;
unsigned long pfn_last;
void *ptr;
+ int ret = -ENOMEM;
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
if (!devmem)
@@ -551,7 +552,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
}
spin_unlock(&mdevice->lock);
- return true;
+ return 0;
err_release:
mutex_unlock(&mdevice->devmem_lock);
@@ -560,7 +561,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
err_devmem:
kfree(devmem);
- return false;
+ return ret;
}
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
@@ -569,8 +570,10 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
struct page *rpage;
/*
- * This is a fake device so we alloc real system memory to store
- * our device memory.
+ * For ZONE_DEVICE private type, this is a fake device so we alloc real
+ * system memory to store our device memory.
+ * For ZONE_DEVICE public type we use the actual dpage to store the data
+ * and ignore rpage.
*/
rpage = alloc_page(GFP_HIGHUSER);
if (!rpage)
@@ -603,7 +606,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
{
struct dmirror_device *mdevice = dmirror->mdevice;
- const unsigned long *src = args->src;
+ unsigned long *src = args->src;
unsigned long *dst = args->dst;
unsigned long addr;
@@ -621,12 +624,18 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
* unallocated pte_none() or read-only zero page.
*/
spage = migrate_pfn_to_page(*src);
-
+ if (spage && is_zone_device_page(spage)) {
+ pr_debug("page already in device spage pfn: 0x%lx\n",
+ page_to_pfn(spage));
+ *src &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
dpage = dmirror_devmem_alloc_page(mdevice);
if (!dpage)
continue;
- rpage = dpage->zone_device_data;
+ rpage = is_device_private_page(dpage) ? dpage->zone_device_data :
+ dpage;
if (spage)
copy_highpage(rpage, spage);
else
@@ -638,8 +647,10 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
* the simulated device memory and that page holds the pointer
* to the mirror.
*/
+ rpage = dpage->zone_device_data;
rpage->zone_device_data = dmirror;
-
+ pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
+ page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage)) |
MIGRATE_PFN_LOCKED;
if ((*src & MIGRATE_PFN_WRITE) ||
@@ -673,10 +684,13 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
continue;
/*
- * Store the page that holds the data so the page table
- * doesn't have to deal with ZONE_DEVICE private pages.
+ * For ZONE_DEVICE private pages we store the page that
+ * holds the data so the page table doesn't have to deal it.
+ * For ZONE_DEVICE public pages we store the actual page, since
+ * the CPU has coherent access to the page.
*/
- entry = dpage->zone_device_data;
+ entry = is_device_private_page(dpage) ? dpage->zone_device_data :
+ dpage;
if (*dst & MIGRATE_PFN_WRITE)
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -690,6 +704,47 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
return 0;
}
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
+ struct dmirror *dmirror)
+{
+ unsigned long *src = args->src;
+ unsigned long *dst = args->dst;
+ unsigned long start = args->start;
+ unsigned long end = args->end;
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE,
+ src++, dst++) {
+ struct page *dpage, *spage;
+
+ spage = migrate_pfn_to_page(*src);
+ if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
+ continue;
+ if (is_device_private_page(spage)) {
+ spage = spage->zone_device_data;
+ } else {
+ pr_debug("page already in system or SPM spage pfn: 0x%lx\n",
+ page_to_pfn(spage));
+ *src &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+ dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
+ if (!dpage)
+ continue;
+ pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
+ page_to_pfn(spage), page_to_pfn(dpage));
+
+ lock_page(dpage);
+ xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
+ copy_highpage(dpage, spage);
+ *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+ if (*src & MIGRATE_PFN_WRITE)
+ *dst |= MIGRATE_PFN_WRITE;
+ }
+ return 0;
+}
+
+
static int dmirror_migrate(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
{
@@ -731,33 +786,46 @@ static int dmirror_migrate(struct dmirror *dmirror,
args.start = addr;
args.end = next;
args.pgmap_owner = dmirror->mdevice;
- args.flags = MIGRATE_VMA_SELECT_SYSTEM;
+ args.flags = (!cmd->alloc_to_devmem &&
+ dmirror->mdevice->zone_device_type ==
+ HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
+ MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
+ MIGRATE_VMA_SELECT_SYSTEM;
ret = migrate_vma_setup(&args);
if (ret)
goto out;
- dmirror_migrate_alloc_and_copy(&args, dmirror);
+ if (cmd->alloc_to_devmem) {
+ pr_debug("Migrating from sys mem to device mem\n");
+ dmirror_migrate_alloc_and_copy(&args, dmirror);
+ } else {
+ pr_debug("Migrating from device mem to sys mem\n");
+ dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
+ }
migrate_vma_pages(&args);
- dmirror_migrate_finalize_and_map(&args, dmirror);
+ if (cmd->alloc_to_devmem)
+ dmirror_migrate_finalize_and_map(&args, dmirror);
migrate_vma_finalize(&args);
}
mmap_read_unlock(mm);
mmput(mm);
- /* Return the migrated data for verification. */
- ret = dmirror_bounce_init(&bounce, start, size);
- if (ret)
- return ret;
- mutex_lock(&dmirror->mutex);
- ret = dmirror_do_read(dmirror, start, end, &bounce);
- mutex_unlock(&dmirror->mutex);
- if (ret == 0) {
- if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
- bounce.size))
- ret = -EFAULT;
+ /* Return the migrated data for verification. only for pages in device zone */
+ if (cmd->alloc_to_devmem) {
+ ret = dmirror_bounce_init(&bounce, start, size);
+ if (ret)
+ return ret;
+ mutex_lock(&dmirror->mutex);
+ ret = dmirror_do_read(dmirror, start, end, &bounce);
+ mutex_unlock(&dmirror->mutex);
+ if (ret == 0) {
+ if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
+ bounce.size))
+ ret = -EFAULT;
+ }
+ cmd->cpages = bounce.cpages;
+ dmirror_bounce_fini(&bounce);
}
- cmd->cpages = bounce.cpages;
- dmirror_bounce_fini(&bounce);
return ret;
out:
@@ -781,9 +849,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
}
page = hmm_pfn_to_page(entry);
- if (is_device_private_page(page)) {
- /* Is the page migrated to this device or some other? */
- if (dmirror->mdevice == dmirror_page_to_device(page))
+ if (is_device_page(page)) {
+ /* Is page ZONE_DEVICE public? */
+ if (!is_device_private_page(page))
+ *perm = HMM_DMIRROR_PROT_DEV_PUBLIC;
+ /*
+ * Is page ZONE_DEVICE private migrated to
+ * this device or some other?
+ */
+ else if (dmirror->mdevice == dmirror_page_to_device(page))
*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
else
*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
@@ -1030,38 +1104,6 @@ static void dmirror_devmem_free(struct page *page)
spin_unlock(&mdevice->lock);
}
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
- struct dmirror *dmirror)
-{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
-
- for (addr = start; addr < end; addr += PAGE_SIZE,
- src++, dst++) {
- struct page *dpage, *spage;
-
- spage = migrate_pfn_to_page(*src);
- if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
- continue;
- spage = spage->zone_device_data;
-
- dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
- if (!dpage)
- continue;
-
- lock_page(dpage);
- xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
- copy_highpage(dpage, spage);
- *dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
- if (*src & MIGRATE_PFN_WRITE)
- *dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
-}
-
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
struct migrate_vma args;
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h
index 00259d994410..b6cb8a7d2470 100644
--- a/lib/test_hmm_uapi.h
+++ b/lib/test_hmm_uapi.h
@@ -17,8 +17,12 @@
* @addr: (in) user address the device will read/write
* @ptr: (in) user address where device data is copied to/from
* @npages: (in) number of pages to read/write
+ * @alloc_to_devmem: (in) desired allocation destination during migration.
+ * True if allocation is to device memory.
+ * False if allocation is to system memory.
* @cpages: (out) number of pages copied
* @faults: (out) number of device page faults seen
+ * @zone_device_type: (out) zone device memory type
*/
struct hmm_dmirror_cmd {
__u64 addr;
@@ -26,7 +30,8 @@ struct hmm_dmirror_cmd {
__u64 npages;
__u64 cpages;
__u64 faults;
- __u64 zone_device_type;
+ __u32 zone_device_type;
+ __u32 alloc_to_devmem;
};
/* Expose the address space of the calling process through hmm device file */
@@ -49,6 +54,8 @@ struct hmm_dmirror_cmd {
* device the ioctl() is made
* HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
* other device
+ * HMM_DMIRROR_PROT_DEV_PUBLIC: Migrate device public page on the device
+ * the ioctl() is made
*/
enum {
HMM_DMIRROR_PROT_ERROR = 0xFF,
@@ -60,6 +67,7 @@ enum {
HMM_DMIRROR_PROT_ZERO = 0x10,
HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20,
HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
+ HMM_DMIRROR_PROT_DEV_PUBLIC = 0x40,
};
enum {
--
2.32.0
next prev parent reply other threads:[~2021-08-25 3:49 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-25 3:48 [PATCH v1 00/14] Add MEMORY_DEVICE_PUBLIC for CPU-accessible coherent device memory Alex Sierra
2021-08-25 3:48 ` [PATCH v1 01/14] ext4/xfs: add page refcount helper Alex Sierra
2021-08-25 7:35 ` Christoph Hellwig
2021-08-25 15:09 ` Theodore Ts'o
2021-08-25 15:49 ` Darrick J. Wong
2021-08-25 3:48 ` [PATCH v1 02/14] mm: remove extra ZONE_DEVICE struct page refcount Alex Sierra
2021-08-25 7:35 ` Christoph Hellwig
2021-08-25 11:15 ` Vlastimil Babka
2021-08-25 17:49 ` Ralph Campbell
2021-08-27 11:26 ` Vlastimil Babka
2021-08-25 3:48 ` [PATCH v1 03/14] mm: add iomem vma selection for memory migration Alex Sierra
2021-08-25 7:40 ` Christoph Hellwig
2021-08-25 7:46 ` Christoph Hellwig
2021-08-25 18:24 ` Sierra Guiza, Alejandro (Alex)
2021-08-26 22:27 ` Felix Kuehling
2021-08-30 8:28 ` Christoph Hellwig
2021-08-30 17:04 ` Felix Kuehling
2021-09-01 8:29 ` Christoph Hellwig
2021-09-01 15:40 ` Felix Kuehling
2021-09-01 22:03 ` Dave Chinner
2021-09-01 23:07 ` Felix Kuehling
2021-09-02 1:14 ` Dave Chinner
2021-09-09 4:55 ` Felix Kuehling
2021-09-02 8:18 ` Christoph Hellwig
2021-09-02 18:07 ` Dan Williams
2021-09-09 4:02 ` Felix Kuehling
2021-08-25 14:24 ` Felix Kuehling
2021-08-25 3:48 ` [PATCH v1 04/14] mm: add zone device public type memory support Alex Sierra
2021-08-25 3:48 ` [PATCH v1 05/14] drm/amdkfd: ref count init for device pages Alex Sierra
2021-08-25 14:34 ` Felix Kuehling
2021-08-25 3:48 ` [PATCH v1 06/14] drm/amdkfd: add SPM support for SVM Alex Sierra
2021-08-25 14:45 ` Felix Kuehling
2021-08-25 3:48 ` [PATCH v1 07/14] drm/amdkfd: public type as sys mem on migration to ram Alex Sierra
2021-08-25 3:48 ` [PATCH v1 08/14] mm: add public type support to migrate_vma helpers Alex Sierra
2021-08-25 7:47 ` Christoph Hellwig
2021-08-25 3:48 ` [PATCH v1 09/14] mm: call pgmap->ops->page_free for DEVICE_PUBLIC pages Alex Sierra
2021-08-25 7:46 ` Christoph Hellwig
2021-08-25 3:48 ` [PATCH v1 10/14] lib: test_hmm add ioctl to get zone device type Alex Sierra
2021-08-25 3:48 ` [PATCH v1 11/14] lib: test_hmm add module param for " Alex Sierra
2021-08-25 3:48 ` Alex Sierra [this message]
2021-08-25 3:48 ` [PATCH v1 13/14] tools: update hmm-test to support device public type Alex Sierra
2021-08-25 3:48 ` [PATCH v1 14/14] tools: update test_hmm script to support SP config Alex Sierra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210825034828.12927-13-alex.sierra@amd.com \
--to=alex.sierra@amd.com \
--cc=Felix.Kuehling@amd.com \
--cc=akpm@linux-foundation.org \
--cc=amd-gfx@lists.freedesktop.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=hch@lst.de \
--cc=jgg@nvidia.com \
--cc=jglisse@redhat.com \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-xfs@vger.kernel.org \
--cc=rcampbell@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).