* [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1)
@ 2018-10-15 9:24 Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get Alexey Kardashevskiy
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-15 9:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm-ppc, David Gibson
This is a first set of patches required for passing through NVIDIA V100
with coherent memory. The full patchset is here:
https://github.com/aik/linux/tree/nv2
The matching QEMU is here:
https://github.com/aik/qemu/tree/nv2
This particular patchset prepares for having device memory which is not
backed with page structs.
Does 4/4 really need splitting?
Please comment. Thanks.
Alexey Kardashevskiy (4):
powerpc/mm/iommu: Rename mm_iommu_get
powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a
region
powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions
powerpc/vfio/iommu/kvm: Do not pin device memory
arch/powerpc/include/asm/iommu.h | 5 +-
arch/powerpc/include/asm/mmu_context.h | 9 +++-
arch/powerpc/kernel/iommu.c | 9 ++--
arch/powerpc/kvm/book3s_64_vio.c | 18 ++++---
arch/powerpc/mm/mmu_context_iommu.c | 97 +++++++++++++++++++++++++++++-----
drivers/vfio/vfio_iommu_spapr_tce.c | 65 +++++++++++++++--------
6 files changed, 151 insertions(+), 52 deletions(-)
--
2.11.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get
2018-10-15 9:24 [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1) Alexey Kardashevskiy
@ 2018-10-15 9:24 ` Alexey Kardashevskiy
2018-10-17 0:44 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region Alexey Kardashevskiy
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-15 9:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm-ppc, David Gibson
Normally mm_iommu_get() is supposed to add a reference and
mm_iommu_put() to remove it. However historically mm_iommu_find() does
the referencing and mm_iommu_get() is doing allocation and referencing.
This is step 1 towards simpler mm_iommu_get().
This renames:
- mm_iommu_get to mm_iommu_new;
- mm_iommu_find to mm_iommu_get.
This should cause no behavioural change.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/include/asm/mmu_context.h | 4 ++--
arch/powerpc/mm/mmu_context_iommu.c | 8 ++++----
drivers/vfio/vfio_iommu_spapr_tce.c | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b694d6a..59d4941 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -21,7 +21,7 @@ struct mm_iommu_table_group_mem_t;
extern int isolate_lru_page(struct page *page); /* from internal.h */
extern bool mm_iommu_preregistered(struct mm_struct *mm);
-extern long mm_iommu_get(struct mm_struct *mm,
+extern long mm_iommu_new(struct mm_struct *mm,
unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_put(struct mm_struct *mm,
@@ -32,7 +32,7 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
unsigned long ua, unsigned long size);
extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
struct mm_struct *mm, unsigned long ua, unsigned long size);
-extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 56c2234..8eeb99d 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -126,7 +126,7 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
@@ -252,7 +252,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_get);
+EXPORT_SYMBOL_GPL(mm_iommu_new);
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
@@ -368,7 +368,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret;
}
-struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
@@ -382,7 +382,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_find);
+EXPORT_SYMBOL_GPL(mm_iommu_get);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index ad63725..1701798 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,7 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
+ mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
if (!mem)
return -ENOENT;
@@ -185,7 +185,7 @@ static long tce_iommu_register_pages(struct tce_container *container,
((vaddr + size) < vaddr))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, entries);
+ mem = mm_iommu_get(container->mm, vaddr, entries);
if (mem) {
list_for_each_entry(tcemem, &container->prereg_list, next) {
if (tcemem->mem == mem)
@@ -193,7 +193,7 @@ static long tce_iommu_register_pages(struct tce_container *container,
}
}
- ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
+ ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
if (ret)
return ret;
--
2.11.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region
2018-10-15 9:24 [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1) Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get Alexey Kardashevskiy
@ 2018-10-15 9:24 ` Alexey Kardashevskiy
2018-10-17 0:46 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 4/4] powerpc/vfio/iommu/kvm: Do not pin device memory Alexey Kardashevskiy
3 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-15 9:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm-ppc, David Gibson
We are going to add another helper to preregister device memory so
instead of having mm_iommu_new() which pre-registers the normal memory
and references the region, we need separate helpers for pre-registerign
and referencing.
To make the mm_iommu_get name reflect what it is supposed to do, this
changes mm_iommu_get() to reference the region so from now on for every
mm_iommu_get() we need a matching mm_iommu_put().
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/mm/mmu_context_iommu.c | 5 +++++
drivers/vfio/vfio_iommu_spapr_tce.c | 33 ++++++++++++++++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 8eeb99d..a8c4a3c 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -373,13 +373,18 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ mutex_lock(&mem_list_mutex);
+
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
+ ++mem->used;
break;
}
}
+ mutex_unlock(&mem_list_mutex);
+
return ret;
}
EXPORT_SYMBOL_GPL(mm_iommu_get);
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 1701798..56db071 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -151,7 +151,8 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
{
struct mm_iommu_table_group_mem_t *mem;
struct tce_iommu_prereg *tcemem;
- bool found = false;
+ bool found;
+ long ret;
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
return -EINVAL;
@@ -168,9 +169,13 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
}
if (!found)
- return -ENOENT;
+ ret = -ENOENT;
+ else
+ ret = tce_iommu_prereg_free(container, tcemem);
- return tce_iommu_prereg_free(container, tcemem);
+ mm_iommu_put(container->mm, mem);
+
+ return ret;
}
static long tce_iommu_register_pages(struct tce_container *container,
@@ -188,19 +193,21 @@ static long tce_iommu_register_pages(struct tce_container *container,
mem = mm_iommu_get(container->mm, vaddr, entries);
if (mem) {
list_for_each_entry(tcemem, &container->prereg_list, next) {
- if (tcemem->mem == mem)
- return -EBUSY;
+ if (tcemem->mem == mem) {
+ ret = -EBUSY;
+ goto put_exit;
+ }
}
+ } else {
+ ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
+ if (ret)
+ return ret;
}
- ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
- if (ret)
- return ret;
-
tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
if (!tcemem) {
- mm_iommu_put(container->mm, mem);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto put_exit;
}
tcemem->mem = mem;
@@ -209,6 +216,10 @@ static long tce_iommu_register_pages(struct tce_container *container,
container->enabled = true;
return 0;
+
+put_exit:
+ mm_iommu_put(container->mm, mem);
+ return ret;
}
static bool tce_page_is_contained(struct page *page, unsigned page_shift)
--
2.11.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions
2018-10-15 9:24 [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1) Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region Alexey Kardashevskiy
@ 2018-10-15 9:24 ` Alexey Kardashevskiy
2018-10-17 1:00 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 4/4] powerpc/vfio/iommu/kvm: Do not pin device memory Alexey Kardashevskiy
3 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-15 9:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm-ppc, David Gibson
Since we are going to have 2 different preregistering helpers, let's
make it clear that mm_iommu_new() is only for the normal (i.e. not device)
memory and for existing areas mm_iommu_get() should be used instead.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/mm/mmu_context_iommu.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index a8c4a3c..839dbce 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -141,8 +141,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
- ++mem->used;
- *pmem = mem;
+ ret = -EBUSY;
goto unlock_exit;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH kernel 4/4] powerpc/vfio/iommu/kvm: Do not pin device memory
2018-10-15 9:24 [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1) Alexey Kardashevskiy
` (2 preceding siblings ...)
2018-10-15 9:24 ` [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions Alexey Kardashevskiy
@ 2018-10-15 9:24 ` Alexey Kardashevskiy
3 siblings, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-15 9:24 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Alex Williamson, kvm-ppc, David Gibson
This new memory does not have page structs as it is not plugged to
the host so gup() will fail anyway.
This adds 2 helpers:
- mm_iommu_newdev() to preregister the "memory device" memory so
the rest of API can still be used;
- mm_iommu_is_devmem() to know if the physical address is one of thise
new regions which we must avoid unpinning of.
This adds @mm to tce_page_is_contained() and iommu_tce_xchg() to test
if the memory is device memory to avoid pfn_to_page().
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
arch/powerpc/include/asm/iommu.h | 5 +-
arch/powerpc/include/asm/mmu_context.h | 5 ++
arch/powerpc/kernel/iommu.c | 9 ++--
arch/powerpc/kvm/book3s_64_vio.c | 18 ++++----
arch/powerpc/mm/mmu_context_iommu.c | 83 ++++++++++++++++++++++++++++++----
drivers/vfio/vfio_iommu_spapr_tce.c | 28 ++++++++----
6 files changed, 116 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 35db0cb..a8aeac0 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -218,8 +218,9 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
-extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction);
+extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 59d4941..45330ff 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -24,6 +24,9 @@ extern bool mm_iommu_preregistered(struct mm_struct *mm);
extern long mm_iommu_new(struct mm_struct *mm,
unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_put(struct mm_struct *mm,
struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_init(struct mm_struct *mm);
@@ -39,6 +42,8 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
+extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 19b4c62..ab88b0f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -47,6 +47,7 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
#define DBG(...)
@@ -993,15 +994,17 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction)
+long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
- (*direction == DMA_BIDIRECTIONAL)))
+ (*direction == DMA_BIDIRECTIONAL)) &&
+ !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 62a8d03..532ab797 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -397,12 +397,13 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
+static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -433,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
+ if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
return H_TOO_HARD;
if (dir == DMA_NONE)
@@ -441,7 +442,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
- iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -487,7 +488,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;
- ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
@@ -566,7 +567,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
entry, ua, dir);
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
}
}
@@ -655,7 +656,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
iommu_tce_direction(tce));
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
+ entry);
goto unlock_exit;
}
}
@@ -704,7 +706,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret;
WARN_ON_ONCE(1);
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
}
}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 839dbce..4835b4e 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
+#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
+ u64 dev_hpa; /* Device memory base address */
};
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
@@ -155,11 +158,13 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
- ret = mm_iommu_adjust_locked_vm(mm, entries, true);
- if (ret)
- goto unlock_exit;
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ if (ret)
+ goto unlock_exit;
- locked_entries = entries;
+ locked_entries = entries;
+ }
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem) {
@@ -167,6 +172,13 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+ mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+ mem->dev_hpa = dev_hpa;
+ goto good_exit;
+ }
+ mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
/*
* For a starting point for a maximum page size calculation
* we use @ua and @entries natural alignment to allow IOMMU pages
@@ -235,6 +247,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
+good_exit:
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
@@ -251,13 +264,31 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
return ret;
}
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+ pmem);
+}
EXPORT_SYMBOL_GPL(mm_iommu_new);
+long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_newdev);
+
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
long i;
struct page *page = NULL;
+ if (!mem->hpas)
+ return;
+
for (i = 0; i < mem->entries; ++i) {
if (!mem->hpas[i])
continue;
@@ -299,6 +330,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
+ unsigned long entries, dev_hpa;
mutex_lock(&mem_list_mutex);
@@ -320,9 +352,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
}
/* @mapped became 0 so now mappings are disabled, release the region */
+ entries = mem->entries;
+ dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
- mm_iommu_adjust_locked_vm(mm, mem->entries, false);
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ mm_iommu_adjust_locked_vm(mm, entries, false);
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -392,7 +427,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- u64 *va = &mem->hpas[entry];
+ u64 *va;
if (entry >= mem->entries)
return -EFAULT;
@@ -400,6 +435,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ va = &mem->hpas[entry];
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
@@ -410,7 +451,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- void *va = &mem->hpas[entry];
unsigned long *pa;
if (entry >= mem->entries)
@@ -419,7 +459,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
- pa = (void *) vmalloc_to_phys(va);
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
if (!pa)
return -EFAULT;
@@ -449,6 +494,26 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
+extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ const unsigned long pagesize = 1UL << pageshift;
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ continue;
+
+ if ((mem->dev_hpa <= hpa) &&
+ (hpa + pagesize <= mem->dev_hpa +
+ (mem->entries << PAGE_SHIFT)))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 56db071..ed89137 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -222,8 +222,15 @@ static long tce_iommu_register_pages(struct tce_container *container,
return ret;
}
-static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
+ unsigned int page_shift)
{
+ struct page *page;
+
+ if (mm_iommu_is_devmem(mm, hpa, page_shift))
+ return true;
+
+ page = pfn_to_page(hpa >> PAGE_SHIFT);
/*
* Check that the TCE table granularity is not bigger than the size of
* a page we just found. Otherwise the hardware can get access to
@@ -499,7 +506,8 @@ static int tce_iommu_clear(struct tce_container *container,
direction = DMA_NONE;
oldhpa = 0;
- ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
+ ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
+ &direction);
if (ret)
continue;
@@ -537,7 +545,6 @@ static long tce_iommu_build(struct tce_container *container,
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -548,15 +555,16 @@ static long tce_iommu_build(struct tce_container *container,
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
hpa |= offset;
dirtmp = direction;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
+ &dirtmp);
if (ret) {
tce_iommu_unuse_page(container, hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
@@ -583,7 +591,6 @@ static long tce_iommu_build_v2(struct tce_container *container,
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -596,8 +603,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
@@ -610,7 +617,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (mm_iommu_mapped_inc(mem))
break;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
+ &dirtmp);
if (ret) {
/* dirtmp cannot be DMA_NONE here */
tce_iommu_unuse_page_v2(container, tbl, entry + i);
--
2.11.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get
2018-10-15 9:24 ` [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get Alexey Kardashevskiy
@ 2018-10-17 0:44 ` David Gibson
0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2018-10-17 0:44 UTC (permalink / raw)
To: Alexey Kardashevskiy; +Cc: Alex Williamson, linuxppc-dev, kvm-ppc
[-- Attachment #1: Type: text/plain, Size: 5232 bytes --]
On Mon, Oct 15, 2018 at 08:24:13PM +1100, Alexey Kardashevskiy wrote:
> Normally mm_iommu_get() is supposed to add a reference and
> mm_iommu_put() to remove it. However historically mm_iommu_find() does
> the referencing and mm_iommu_get() is doing allocation and referencing.
>
> This is step 1 towards simpler mm_iommu_get().
>
> This renames:
> - mm_iommu_get to mm_iommu_new;
> - mm_iommu_find to mm_iommu_get.
>
> This should cause no behavioural change.
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Erm.. as far as I can tell, mm_iommu_find() doesn't adjust any
reference counts, so renaming it to mm_iommu_get() doesn't really make
sense.
> ---
> arch/powerpc/include/asm/mmu_context.h | 4 ++--
> arch/powerpc/mm/mmu_context_iommu.c | 8 ++++----
> drivers/vfio/vfio_iommu_spapr_tce.c | 6 +++---
> 3 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index b694d6a..59d4941 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -21,7 +21,7 @@ struct mm_iommu_table_group_mem_t;
>
> extern int isolate_lru_page(struct page *page); /* from internal.h */
> extern bool mm_iommu_preregistered(struct mm_struct *mm);
> -extern long mm_iommu_get(struct mm_struct *mm,
> +extern long mm_iommu_new(struct mm_struct *mm,
> unsigned long ua, unsigned long entries,
> struct mm_iommu_table_group_mem_t **pmem);
> extern long mm_iommu_put(struct mm_struct *mm,
> @@ -32,7 +32,7 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
> unsigned long ua, unsigned long size);
> extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
> struct mm_struct *mm, unsigned long ua, unsigned long size);
> -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
> +extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
> unsigned long ua, unsigned long entries);
> extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
> unsigned long ua, unsigned int pageshift, unsigned long *hpa);
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index 56c2234..8eeb99d 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -126,7 +126,7 @@ static int mm_iommu_move_page_from_cma(struct page *page)
> return 0;
> }
>
> -long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> +long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> struct mm_iommu_table_group_mem_t **pmem)
> {
> struct mm_iommu_table_group_mem_t *mem;
> @@ -252,7 +252,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
>
> return ret;
> }
> -EXPORT_SYMBOL_GPL(mm_iommu_get);
> +EXPORT_SYMBOL_GPL(mm_iommu_new);
>
> static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
> {
> @@ -368,7 +368,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
> return ret;
> }
>
> -struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
> +struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
> unsigned long ua, unsigned long entries)
> {
> struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
> @@ -382,7 +382,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
>
> return ret;
> }
> -EXPORT_SYMBOL_GPL(mm_iommu_find);
> +EXPORT_SYMBOL_GPL(mm_iommu_get);
>
> long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
> unsigned long ua, unsigned int pageshift, unsigned long *hpa)
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> index ad63725..1701798 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -156,7 +156,7 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
> if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
> return -EINVAL;
>
> - mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
> + mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
> if (!mem)
> return -ENOENT;
>
> @@ -185,7 +185,7 @@ static long tce_iommu_register_pages(struct tce_container *container,
> ((vaddr + size) < vaddr))
> return -EINVAL;
>
> - mem = mm_iommu_find(container->mm, vaddr, entries);
> + mem = mm_iommu_get(container->mm, vaddr, entries);
> if (mem) {
> list_for_each_entry(tcemem, &container->prereg_list, next) {
> if (tcemem->mem == mem)
> @@ -193,7 +193,7 @@ static long tce_iommu_register_pages(struct tce_container *container,
> }
> }
>
> - ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
> + ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
> if (ret)
> return ret;
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region
2018-10-15 9:24 ` [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region Alexey Kardashevskiy
@ 2018-10-17 0:46 ` David Gibson
0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2018-10-17 0:46 UTC (permalink / raw)
To: Alexey Kardashevskiy; +Cc: Alex Williamson, linuxppc-dev, kvm-ppc
[-- Attachment #1: Type: text/plain, Size: 3941 bytes --]
On Mon, Oct 15, 2018 at 08:24:14PM +1100, Alexey Kardashevskiy wrote:
> We are going to add another helper to preregister device memory so
> instead of having mm_iommu_new() which pre-registers the normal memory
> and references the region, we need separate helpers for pre-registerign
> and referencing.
>
> To make the mm_iommu_get name reflect what it is supposed to do, this
> changes mm_iommu_get() to reference the region so from now on for every
> mm_iommu_get() we need a matching mm_iommu_put().
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
.. ah, I see.
I think this should be folded with the first patch, so we don't have
an interim step where mm_iommu_get() has a misleading name.
> ---
> arch/powerpc/mm/mmu_context_iommu.c | 5 +++++
> drivers/vfio/vfio_iommu_spapr_tce.c | 33 ++++++++++++++++++++++-----------
> 2 files changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index 8eeb99d..a8c4a3c 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -373,13 +373,18 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
> {
> struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
>
> + mutex_lock(&mem_list_mutex);
> +
> list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
> if ((mem->ua == ua) && (mem->entries == entries)) {
> ret = mem;
> + ++mem->used;
> break;
> }
> }
>
> + mutex_unlock(&mem_list_mutex);
> +
> return ret;
> }
> EXPORT_SYMBOL_GPL(mm_iommu_get);
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 1701798..56db071 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -151,7 +151,8 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
> {
> struct mm_iommu_table_group_mem_t *mem;
> struct tce_iommu_prereg *tcemem;
> - bool found = false;
> + bool found;
> + long ret;
>
> if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
> return -EINVAL;
> @@ -168,9 +169,13 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
> }
>
> if (!found)
> - return -ENOENT;
> + ret = -ENOENT;
> + else
> + ret = tce_iommu_prereg_free(container, tcemem);
>
> - return tce_iommu_prereg_free(container, tcemem);
> + mm_iommu_put(container->mm, mem);
> +
> + return ret;
> }
>
> static long tce_iommu_register_pages(struct tce_container *container,
> @@ -188,19 +193,21 @@ static long tce_iommu_register_pages(struct tce_container *container,
> mem = mm_iommu_get(container->mm, vaddr, entries);
> if (mem) {
> list_for_each_entry(tcemem, &container->prereg_list, next) {
> - if (tcemem->mem == mem)
> - return -EBUSY;
> + if (tcemem->mem == mem) {
> + ret = -EBUSY;
> + goto put_exit;
> + }
> }
> + } else {
> + ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
> + if (ret)
> + return ret;
> }
>
> - ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
> - if (ret)
> - return ret;
> -
> tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
> if (!tcemem) {
> - mm_iommu_put(container->mm, mem);
> - return -ENOMEM;
> + ret = -ENOMEM;
> + goto put_exit;
> }
>
> tcemem->mem = mem;
> @@ -209,6 +216,10 @@ static long tce_iommu_register_pages(struct tce_container *container,
> container->enabled = true;
>
> return 0;
> +
> +put_exit:
> + mm_iommu_put(container->mm, mem);
> + return ret;
> }
>
> static bool tce_page_is_contained(struct page *page, unsigned page_shift)
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions
2018-10-15 9:24 ` [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions Alexey Kardashevskiy
@ 2018-10-17 1:00 ` David Gibson
2018-10-17 3:34 ` Alexey Kardashevskiy
0 siblings, 1 reply; 10+ messages in thread
From: David Gibson @ 2018-10-17 1:00 UTC (permalink / raw)
To: Alexey Kardashevskiy; +Cc: Alex Williamson, linuxppc-dev, kvm-ppc
[-- Attachment #1: Type: text/plain, Size: 1429 bytes --]
On Mon, Oct 15, 2018 at 08:24:15PM +1100, Alexey Kardashevskiy wrote:
> Since we are going to have 2 different preregistering helpers, let's
> make it clear that mm_iommu_new() is only for the normal (i.e. not device)
> memory and for existing areas mm_iommu_get() should be used instead.
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
I think the idea is sensible. However (and, yes, this is really an
existing bug) - shouldn't we check for a request to add anything
overlapping with an existing region, not just one that exactly
matches?
> ---
> arch/powerpc/mm/mmu_context_iommu.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index a8c4a3c..839dbce 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -141,8 +141,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
> next) {
> if ((mem->ua == ua) && (mem->entries == entries)) {
> - ++mem->used;
> - *pmem = mem;
> + ret = -EBUSY;
> goto unlock_exit;
> }
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions
2018-10-17 1:00 ` David Gibson
@ 2018-10-17 3:34 ` Alexey Kardashevskiy
2018-10-17 4:45 ` David Gibson
0 siblings, 1 reply; 10+ messages in thread
From: Alexey Kardashevskiy @ 2018-10-17 3:34 UTC (permalink / raw)
To: David Gibson; +Cc: Alex Williamson, linuxppc-dev, kvm-ppc
On 17/10/2018 12:00, David Gibson wrote:
> On Mon, Oct 15, 2018 at 08:24:15PM +1100, Alexey Kardashevskiy wrote:
>> Since we are going to have 2 different preregistering helpers, let's
>> make it clear that mm_iommu_new() is only for the normal (i.e. not device)
>> memory and for existing areas mm_iommu_get() should be used instead.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>
> I think the idea is sensible. However (and, yes, this is really an
> existing bug) - shouldn't we check for a request to add anything
> overlapping with an existing region, not just one that exactly
> matches?
The overlap check is below the changed hunk:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/mm/mmu_context_iommu.c#n150
>
>> ---
>> arch/powerpc/mm/mmu_context_iommu.c | 3 +--
>> 1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
>> index a8c4a3c..839dbce 100644
>> --- a/arch/powerpc/mm/mmu_context_iommu.c
>> +++ b/arch/powerpc/mm/mmu_context_iommu.c
>> @@ -141,8 +141,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
>> list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
>> next) {
>> if ((mem->ua == ua) && (mem->entries == entries)) {
>> - ++mem->used;
>> - *pmem = mem;
>> + ret = -EBUSY;
>> goto unlock_exit;
>> }
>>
>
--
Alexey
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions
2018-10-17 3:34 ` Alexey Kardashevskiy
@ 2018-10-17 4:45 ` David Gibson
0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2018-10-17 4:45 UTC (permalink / raw)
To: Alexey Kardashevskiy; +Cc: Alex Williamson, linuxppc-dev, kvm-ppc
[-- Attachment #1: Type: text/plain, Size: 2109 bytes --]
On Wed, Oct 17, 2018 at 02:34:32PM +1100, Alexey Kardashevskiy wrote:
>
>
> On 17/10/2018 12:00, David Gibson wrote:
> > On Mon, Oct 15, 2018 at 08:24:15PM +1100, Alexey Kardashevskiy wrote:
> >> Since we are going to have 2 different preregistering helpers, let's
> >> make it clear that mm_iommu_new() is only for the normal (i.e. not device)
> >> memory and for existing areas mm_iommu_get() should be used instead.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >
> > I think the idea is sensible. However (and, yes, this is really an
> > existing bug) - shouldn't we check for a request to add anything
> > overlapping with an existing region, not just one that exactly
> > matches?
>
> The overlap check is below the changed hunk:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/mm/mmu_context_iommu.c#n150
Ah, right.
In that case can't you just drop this whole if. I don't see that
there's any use in giving different error codes for "tried to register
exactly a region you registered before" and "tried to register a
region overlapping one you registered before.
>
>
> >
> >> ---
> >> arch/powerpc/mm/mmu_context_iommu.c | 3 +--
> >> 1 file changed, 1 insertion(+), 2 deletions(-)
> >>
> >> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> >> index a8c4a3c..839dbce 100644
> >> --- a/arch/powerpc/mm/mmu_context_iommu.c
> >> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> >> @@ -141,8 +141,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> >> list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
> >> next) {
> >> if ((mem->ua == ua) && (mem->entries == entries)) {
> >> - ++mem->used;
> >> - *pmem = mem;
> >> + ret = -EBUSY;
> >> goto unlock_exit;
> >> }
> >>
> >
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-10-17 4:51 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-15 9:24 [PATCH kernel 0/4] vfio/spapr_tce: Reworks for NVIDIA V100 + P9 passthrough (part 1) Alexey Kardashevskiy
2018-10-15 9:24 ` [PATCH kernel 1/4] powerpc/mm/iommu: Rename mm_iommu_get Alexey Kardashevskiy
2018-10-17 0:44 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 2/4] powerpc/mm/iommu/vfio_spapr_tce: Change mm_iommu_get to reference a region Alexey Kardashevskiy
2018-10-17 0:46 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 3/4] powerpc/mm/iommu: Make mm_iommu_new() fail on existing regions Alexey Kardashevskiy
2018-10-17 1:00 ` David Gibson
2018-10-17 3:34 ` Alexey Kardashevskiy
2018-10-17 4:45 ` David Gibson
2018-10-15 9:24 ` [PATCH kernel 4/4] powerpc/vfio/iommu/kvm: Do not pin device memory Alexey Kardashevskiy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).