* [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma
@ 2018-10-11 3:52 Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 1/5] mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg Aneesh Kumar K.V
` (4 more replies)
0 siblings, 5 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
We can upgrade pte access (R -> RW transition) via mprotect or autonuma. We need
to make sure we follow the recommended pte update sequence as outlined in
commit: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")
for such updates. This patch series do that.
Aneesh Kumar K.V (5):
mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg
mm: update ptep_modify_prot_commit to take old pte value as arg
arch/powerpc/mm: Nest MMU workaround for mprotect/autonuma RW upgrade.
mm/hugetlb: Add prot_modify_start/commit sequence for hugetlb update
arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW
upgrade
arch/powerpc/include/asm/book3s/64/hugetlb.h | 8 +++++
arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++
arch/powerpc/include/asm/hugetlb.h | 2 +-
arch/powerpc/mm/hugetlbpage.c | 35 ++++++++++++++++++++
arch/powerpc/mm/pgtable-book3s64.c | 34 +++++++++++++++++++
arch/s390/include/asm/pgtable.h | 5 +--
arch/s390/mm/pgtable.c | 8 +++--
arch/x86/include/asm/paravirt.h | 9 +++--
fs/proc/task_mmu.c | 8 +++--
include/asm-generic/pgtable.h | 10 +++---
include/linux/hugetlb.h | 18 ++++++++++
mm/hugetlb.c | 8 +++--
mm/memory.c | 8 ++---
mm/mprotect.c | 6 ++--
14 files changed, 150 insertions(+), 27 deletions(-)
--
2.17.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/5] mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
@ 2018-10-11 3:52 ` Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 2/5] mm: update ptep_modify_prot_commit to take old pte value " Aneesh Kumar K.V
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
Some architecture may want to call flush_tlb_range from these helpers.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/s390/include/asm/pgtable.h | 4 ++--
arch/s390/mm/pgtable.c | 6 ++++--
arch/x86/include/asm/paravirt.h | 7 +++++--
fs/proc/task_mmu.c | 4 ++--
include/asm-generic/pgtable.h | 8 ++++----
mm/memory.c | 4 ++--
mm/mprotect.c | 4 ++--
7 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0e7cb0dc9c33..8e7f26dfedc6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1035,8 +1035,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index f2cc7da473e4..29c0a21cd34a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -301,12 +301,13 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_lazy);
-pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
{
pgste_t pgste;
pte_t old;
int nodat;
+ struct mm_struct *mm = vma->vm_mm;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
@@ -320,10 +321,11 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_modify_prot_start);
-void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte)
{
pgste_t pgste;
+ struct mm_struct *mm = vma->vm_mm;
if (!MACHINE_HAS_NX)
pte_val(pte) &= ~_PAGE_NOEXEC;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e375d4266b53..c5d203a51e50 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -421,10 +421,11 @@ static inline pgdval_t pgd_val(pgd_t pgd)
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
{
pteval_t ret;
+ struct mm_struct *mm = vma->vm_mm;
ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
mm, addr, ptep);
@@ -432,9 +433,11 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long a
return (pte_t) { .pte = ret };
}
-static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte)
{
+ struct mm_struct *mm = vma->vm_mm;
+
if (sizeof(pteval_t) > sizeof(long))
/* 5 arg words */
pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..229df16e7ad0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -938,10 +938,10 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
pte_t ptent = *pte;
if (pte_present(ptent)) {
- ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
+ ptent = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_wrprotect(ptent);
ptent = pte_clear_soft_dirty(ptent);
- ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 88ebc6102c7c..021b94cd3260 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -606,22 +606,22 @@ static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
* queue the update to be done at some later time. The update must be
* actually committed before the pte lock is released, however.
*/
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr,
pte_t *ptep)
{
- return __ptep_modify_prot_start(mm, addr, ptep);
+ return __ptep_modify_prot_start(vma->vm_mm, addr, ptep);
}
/*
* Commit an update to a pte, leaving any hardware-controlled bits in
* the PTE unmodified.
*/
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr,
pte_t *ptep, pte_t pte)
{
- __ptep_modify_prot_commit(mm, addr, ptep, pte);
+ __ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
#endif /* CONFIG_MMU */
diff --git a/mm/memory.c b/mm/memory.c
index c467102a5cbc..261d30f51499 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3806,12 +3806,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
* Make it present again, Depending on how arch implementes non
* accessible ptes, some can allow access by kernel mode.
*/
- pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
+ pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
- ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
+ ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
page = vm_normal_page(vma, vmf->address, pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6d331620b9e5..a301d4c83d3c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -110,7 +110,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
}
- ptent = ptep_modify_prot_start(mm, addr, pte);
+ ptent = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_modify(ptent, newprot);
if (preserve_write)
ptent = pte_mk_savedwrite(ptent);
@@ -121,7 +121,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
!(vma->vm_flags & VM_SOFTDIRTY))) {
ptent = pte_mkwrite(ptent);
}
- ptep_modify_prot_commit(mm, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, ptent);
pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/5] mm: update ptep_modify_prot_commit to take old pte value as arg
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 1/5] mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg Aneesh Kumar K.V
@ 2018-10-11 3:52 ` Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 3/5] arch/powerpc/mm: Nest MMU workaround for mprotect/autonuma RW upgrade Aneesh Kumar K.V
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
Architectures like ppc64 requires to do a conditional tlb flush based on the old
and new value of pte. Enable that by passing old pte value as the arg.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/s390/include/asm/pgtable.h | 3 ++-
arch/s390/mm/pgtable.c | 2 +-
arch/x86/include/asm/paravirt.h | 2 +-
fs/proc/task_mmu.c | 8 +++++---
include/asm-generic/pgtable.h | 2 +-
mm/memory.c | 8 ++++----
mm/mprotect.c | 6 +++---
7 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 8e7f26dfedc6..626250436897 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1036,7 +1036,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, pte_t);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 29c0a21cd34a..b283b92722cc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -322,7 +322,7 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
EXPORT_SYMBOL(ptep_modify_prot_start);
void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
- pte_t *ptep, pte_t pte)
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
pgste_t pgste;
struct mm_struct *mm = vma->vm_mm;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c5d203a51e50..17214e074286 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -434,7 +434,7 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned
}
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
- pte_t *ptep, pte_t pte)
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
struct mm_struct *mm = vma->vm_mm;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 229df16e7ad0..505aa21d04df 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -938,10 +938,12 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
pte_t ptent = *pte;
if (pte_present(ptent)) {
- ptent = ptep_modify_prot_start(vma, addr, pte);
- ptent = pte_wrprotect(ptent);
+ pte_t old_pte;
+
+ old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
- ptep_modify_prot_commit(vma, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 021b94cd3260..4e4723f6be5e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -619,7 +619,7 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
*/
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr,
- pte_t *ptep, pte_t pte)
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
__ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
}
diff --git a/mm/memory.c b/mm/memory.c
index 261d30f51499..211df764f232 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3786,7 +3786,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
int last_cpupid;
int target_nid;
bool migrated = false;
- pte_t pte;
+ pte_t pte, old_pte;
bool was_writable = pte_savedwrite(vmf->orig_pte);
int flags = 0;
@@ -3806,12 +3806,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
* Make it present again, Depending on how arch implementes non
* accessible ptes, some can allow access by kernel mode.
*/
- pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
- pte = pte_modify(pte, vma->vm_page_prot);
+ old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+ pte = pte_modify(old_pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
- ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);
+ ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
page = vm_normal_page(vma, vmf->address, pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a301d4c83d3c..1b46b1b1248d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -110,8 +110,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
continue;
}
- ptent = ptep_modify_prot_start(vma, addr, pte);
- ptent = pte_modify(ptent, newprot);
+ oldpte = ptep_modify_prot_start(vma, addr, pte);
+ ptent = pte_modify(oldpte, newprot);
if (preserve_write)
ptent = pte_mk_savedwrite(ptent);
@@ -121,7 +121,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
!(vma->vm_flags & VM_SOFTDIRTY))) {
ptent = pte_mkwrite(ptent);
}
- ptep_modify_prot_commit(vma, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
pages++;
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/5] arch/powerpc/mm: Nest MMU workaround for mprotect/autonuma RW upgrade.
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 1/5] mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 2/5] mm: update ptep_modify_prot_commit to take old pte value " Aneesh Kumar K.V
@ 2018-10-11 3:52 ` Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 4/5] mm/hugetlb: Add prot_modify_start/commit sequence for hugetlb update Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 5/5] arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW upgrade Aneesh Kumar K.V
4 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
NestMMU requires us to mark the pte invalid and flush the tlb when we do a
RW upgrade of pte. We fixed a variant of this in the fault path in commit
Fixes: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")
Do the same for mprotect and autonuma upgrades.
Hugetlb is handled in the next patch.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/book3s/64/pgtable.h | 18 +++++++++++
arch/powerpc/mm/pgtable-book3s64.c | 34 ++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f108e2ce7f64..c55468eaedc7 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1324,6 +1324,24 @@ static inline const int pud_pfn(pud_t pud)
BUILD_BUG();
return 0;
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a Read or Write upgrade of pte.
+ */
+static inline bool is_pte_upgrade(unsigned long old_val, unsigned long new_val)
+{
+ if ((!(old_val & _PAGE_READ)) && (new_val & _PAGE_READ))
+ return true;
+
+ if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+ return true;
+
+ return false;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 43e99e1d947b..43f71125249b 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -481,3 +481,37 @@ void arch_report_meminfo(struct seq_file *m)
atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
}
#endif /* CONFIG_PROC_FS */
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ unsigned long pte_val;
+
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
+
+ return __pte(pte_val);
+
+}
+EXPORT_SYMBOL(ptep_modify_prot_start);
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * To avoid NMMU hang while relaxing access we need to flush the tlb before
+ * we set the new value.
+ */
+ if (is_pte_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ flush_tlb_page(vma, addr);
+
+ set_pte_at(mm, addr, ptep, pte);
+}
+EXPORT_SYMBOL(ptep_modify_prot_commit);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/5] mm/hugetlb: Add prot_modify_start/commit sequence for hugetlb update
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
` (2 preceding siblings ...)
2018-10-11 3:52 ` [PATCH 3/5] arch/powerpc/mm: Nest MMU workaround for mprotect/autonuma RW upgrade Aneesh Kumar K.V
@ 2018-10-11 3:52 ` Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 5/5] arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW upgrade Aneesh Kumar K.V
4 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
include/linux/hugetlb.h | 18 ++++++++++++++++++
mm/hugetlb.c | 8 +++++---
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 087fd5f48c91..e2a3b0c854eb 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -543,6 +543,24 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
set_huge_pte_at(mm, addr, ptep, pte);
}
#endif
+
+#ifndef huge_ptep_modify_prot_start
+static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+#endif
+
+#ifndef huge_ptep_modify_prot_commit
+static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+#endif
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
#define alloc_huge_page(v, a, r) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5c390f5a5207..1f3a4df95b2e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4367,10 +4367,12 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
continue;
}
if (!huge_pte_none(pte)) {
- pte = huge_ptep_get_and_clear(mm, address, ptep);
- pte = pte_mkhuge(huge_pte_modify(pte, newprot));
+ pte_t old_pte;
+
+ old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
+ pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
pte = arch_make_huge_pte(pte, vma, NULL, 0);
- set_huge_pte_at(mm, address, ptep, pte);
+ huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
pages++;
}
spin_unlock(ptl);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 5/5] arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW upgrade
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
` (3 preceding siblings ...)
2018-10-11 3:52 ` [PATCH 4/5] mm/hugetlb: Add prot_modify_start/commit sequence for hugetlb update Aneesh Kumar K.V
@ 2018-10-11 3:52 ` Aneesh Kumar K.V
4 siblings, 0 replies; 6+ messages in thread
From: Aneesh Kumar K.V @ 2018-10-11 3:52 UTC (permalink / raw)
To: akpm, mpe, benh; +Cc: linux-mm, linux-kernel, linuxppc-dev, Aneesh Kumar K.V
NestMMU requires us to mark the pte invalid and flush the tlb when we do a
RW upgrade of pte. We fixed a variant of this in the fault path in commit
Fixes: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/book3s/64/hugetlb.h | 8 +++++
arch/powerpc/include/asm/hugetlb.h | 2 +-
arch/powerpc/mm/hugetlbpage.c | 35 ++++++++++++++++++++
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5b0177733994..a12bde29a5f0 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -42,4 +42,12 @@ static inline bool gigantic_page_supported(void)
/* hugepd entry valid bit */
#define HUGEPD_VAL_BITS (0x8000000000000000UL)
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 2d00cc530083..60c1d37e446a 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -4,7 +4,6 @@
#ifdef CONFIG_HUGETLB_PAGE
#include <asm/page.h>
-#include <asm-generic/hugetlb.h>
extern struct kmem_cache *hugepte_cache;
@@ -176,6 +175,7 @@ static inline void arch_clear_hugepage_flags(struct page *page)
{
}
+#include <asm-generic/hugetlb.h>
#else /* ! CONFIG_HUGETLB_PAGE */
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a7226ed9cae6..8b098bedaff5 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -913,3 +913,38 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
return 1;
}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long pte_val;
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep,
+ _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+ return __pte(pte_val);
+}
+EXPORT_SYMBOL(huge_ptep_modify_prot_start);
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * To avoid NMMU hang while relaxing access we need to flush the tlb before
+ * we set the new value.
+ */
+ if (is_pte_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ flush_hugetlb_page(vma, addr);
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+EXPORT_SYMBOL(huge_ptep_modify_prot_commit);
+#endif
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-10-11 3:53 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-11 3:52 [PATCH 0/5] NestMMU pte upgrade workaround for mprotect and autonuma Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 1/5] mm: Update ptep_modify_prot_start/commit to take vm_area_struct as arg Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 2/5] mm: update ptep_modify_prot_commit to take old pte value " Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 3/5] arch/powerpc/mm: Nest MMU workaround for mprotect/autonuma RW upgrade Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 4/5] mm/hugetlb: Add prot_modify_start/commit sequence for hugetlb update Aneesh Kumar K.V
2018-10-11 3:52 ` [PATCH 5/5] arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW upgrade Aneesh Kumar K.V
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).