linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC patch 0/7] thp: transparent hugepages on System z
@ 2012-08-23 17:17 Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 1/7] thp: remove assumptions on pgtable_t type Gerald Schaefer
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

This patch series adds support for transparent hugepages on System z.
Small changes to common code are necessary with regard to a different
pgtable_t, tlb flushing and kvm behaviour on s390, see patches 1 to 3.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 1/7] thp: remove assumptions on pgtable_t type
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 2/7] thp: introduce pmdp_invalidate() Gerald Schaefer
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-pgtable-prealloc.patch --]
[-- Type: text/plain, Size: 6340 bytes --]

The thp page table pre-allocation code currently assumes that pgtable_t
is of type "struct page *". This may not be true for all architectures,
so this patch removes that assumption by replacing the functions
prepare_pmd_huge_pte() and get_pmd_huge_pte() with two new functions
that can be defined architecture-specific.

It also removes two VM_BUG_ON checks for page_count() and page_mapcount()
operating on a pgtable_t. Apart from the VM_BUG_ON removal, there will
be no functional change introduced by this patch.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 include/asm-generic/pgtable.h |    6 +++++
 include/linux/huge_mm.h       |    1 
 mm/huge_memory.c              |   46 +++++-------------------------------------
 mm/pgtable-generic.c          |   39 +++++++++++++++++++++++++++++++++++
 4 files changed, 51 insertions(+), 41 deletions(-)

--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -443,6 +443,12 @@ static inline int pmd_write(pmd_t pmd)
 	return 0;
 }
 #endif /* __HAVE_ARCH_PMD_WRITE */
+#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_deposit(struct mm_struct *mm, pgtable_t pgtable);
+#endif
+#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_withdraw(struct mm_struct *mm);
+#endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifndef pmd_read_atomic
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,7 +11,6 @@ extern int copy_huge_pmd(struct mm_struc
 extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			       unsigned long address, pmd_t *pmd,
 			       pmd_t orig_pmd);
-extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
 extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 					  unsigned long addr,
 					  pmd_t *pmd,
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -611,19 +611,6 @@ out:
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-static void prepare_pmd_huge_pte(pgtable_t pgtable,
-				 struct mm_struct *mm)
-{
-	assert_spin_locked(&mm->page_table_lock);
-
-	/* FIFO */
-	if (!mm->pmd_huge_pte)
-		INIT_LIST_HEAD(&pgtable->lru);
-	else
-		list_add(&pgtable->lru, &mm->pmd_huge_pte->lru);
-	mm->pmd_huge_pte = pgtable;
-}
-
 static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
@@ -665,7 +652,7 @@ static int __do_huge_pmd_anonymous_page(
 		 */
 		page_add_new_anon_rmap(page, vma, haddr);
 		set_pmd_at(mm, haddr, pmd, entry);
-		prepare_pmd_huge_pte(pgtable, mm);
+		pgtable_deposit(mm, pgtable);
 		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
 		mm->nr_ptes++;
 		spin_unlock(&mm->page_table_lock);
@@ -791,7 +778,7 @@ int copy_huge_pmd(struct mm_struct *dst_
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
 	pmd = pmd_mkold(pmd_wrprotect(pmd));
 	set_pmd_at(dst_mm, addr, dst_pmd, pmd);
-	prepare_pmd_huge_pte(pgtable, dst_mm);
+	pgtable_deposit(dst_mm, pgtable);
 	dst_mm->nr_ptes++;
 
 	ret = 0;
@@ -802,25 +789,6 @@ out:
 	return ret;
 }
 
-/* no "address" argument so destroys page coloring of some arch */
-pgtable_t get_pmd_huge_pte(struct mm_struct *mm)
-{
-	pgtable_t pgtable;
-
-	assert_spin_locked(&mm->page_table_lock);
-
-	/* FIFO */
-	pgtable = mm->pmd_huge_pte;
-	if (list_empty(&pgtable->lru))
-		mm->pmd_huge_pte = NULL;
-	else {
-		mm->pmd_huge_pte = list_entry(pgtable->lru.next,
-					      struct page, lru);
-		list_del(&pgtable->lru);
-	}
-	return pgtable;
-}
-
 static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					struct vm_area_struct *vma,
 					unsigned long address,
@@ -876,7 +844,7 @@ static int do_huge_pmd_wp_page_fallback(
 	pmdp_clear_flush_notify(vma, haddr, pmd);
 	/* leave pmd empty until pte is filled */
 
-	pgtable = get_pmd_huge_pte(mm);
+	pgtable = pgtable_withdraw(mm);
 	pmd_populate(mm, &_pmd, pgtable);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1041,7 +1009,7 @@ int zap_huge_pmd(struct mmu_gather *tlb,
 	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
 		struct page *page;
 		pgtable_t pgtable;
-		pgtable = get_pmd_huge_pte(tlb->mm);
+		pgtable = pgtable_withdraw(tlb->mm);
 		page = pmd_page(*pmd);
 		pmd_clear(pmd);
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
@@ -1358,7 +1326,7 @@ static int __split_huge_page_map(struct
 	pmd = page_check_address_pmd(page, mm, address,
 				     PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG);
 	if (pmd) {
-		pgtable = get_pmd_huge_pte(mm);
+		pgtable = pgtable_withdraw(mm);
 		pmd_populate(mm, &_pmd, pgtable);
 
 		for (i = 0, haddr = address; i < HPAGE_PMD_NR;
@@ -1971,8 +1939,6 @@ static void collapse_huge_page(struct mm
 	pte_unmap(pte);
 	__SetPageUptodate(new_page);
 	pgtable = pmd_pgtable(_pmd);
-	VM_BUG_ON(page_count(pgtable) != 1);
-	VM_BUG_ON(page_mapcount(pgtable) != 0);
 
 	_pmd = mk_pmd(new_page, vma->vm_page_prot);
 	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
@@ -1990,7 +1956,7 @@ static void collapse_huge_page(struct mm
 	page_add_new_anon_rmap(new_page, vma, address);
 	set_pmd_at(mm, address, pmd, _pmd);
 	update_mmu_cache(vma, address, _pmd);
-	prepare_pmd_huge_pte(pgtable, mm);
+	pgtable_deposit(mm, pgtable);
 	spin_unlock(&mm->page_table_lock);
 
 #ifndef CONFIG_NUMA
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -120,3 +120,42 @@ void pmdp_splitting_flush(struct vm_area
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
+
+#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	if (!mm->pmd_huge_pte)
+		INIT_LIST_HEAD(&pgtable->lru);
+	else
+		list_add(&pgtable->lru, &mm->pmd_huge_pte->lru);
+	mm->pmd_huge_pte = pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
+
+#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/* no "address" argument so destroys page coloring of some arch */
+pgtable_t pgtable_withdraw(struct mm_struct *mm)
+{
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	pgtable = mm->pmd_huge_pte;
+	if (list_empty(&pgtable->lru))
+		mm->pmd_huge_pte = NULL;
+	else {
+		mm->pmd_huge_pte = list_entry(pgtable->lru.next,
+					      struct page, lru);
+		list_del(&pgtable->lru);
+	}
+	return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 2/7] thp: introduce pmdp_invalidate()
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 1/7] thp: remove assumptions on pgtable_t type Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-25 12:36   ` Hillf Danton
  2012-08-23 17:17 ` [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags Gerald Schaefer
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-pmd-flush.patch --]
[-- Type: text/plain, Size: 1622 bytes --]

On System z, a valid page table entry must not be changed while it is
attached to any CPU. So instead of pmd_mknotpresent() and set_pmd_at(),
an IDTE operation would be necessary there. This patch introduces the
pmdp_invalidate() function, to allow architecture-specific
implementations.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 include/asm-generic/pgtable.h |   11 +++++++++++
 mm/huge_memory.c              |    3 +--
 2 files changed, 12 insertions(+), 2 deletions(-)

--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -93,6 +93,17 @@ static inline pmd_t pmdp_get_and_clear(s
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_INVALIDATE
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp)
+{
+	set_pmd_at(vma->vm_mm, address, pmd, pmd_mknotpresent(*pmd));
+	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 					    unsigned long address, pte_t *ptep,
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1374,8 +1374,7 @@ static int __split_huge_page_map(struct
 		 * SMP TLB and finally we write the non-huge version
 		 * of the pmd entry with pmd_populate.
 		 */
-		set_pmd_at(mm, address, pmd, pmd_mknotpresent(*pmd));
-		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+		pmdp_invalidate(vma, address, pmd);
 		pmd_populate(mm, pmd, pgtable);
 		ret = 1;
 	}


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 1/7] thp: remove assumptions on pgtable_t type Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 2/7] thp: introduce pmdp_invalidate() Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-25 12:47   ` Hillf Danton
  2012-08-23 17:17 ` [RFC patch 4/7] thp, s390: thp splitting backend for System z Gerald Schaefer
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-madvise.patch --]
[-- Type: text/plain, Size: 977 bytes --]

This adds a check to hugepage_madvise(), to refuse MADV_HUGEPAGE
if VM_NOHUGEPAGE is set in mm->def_flags. On System z, the VM_NOHUGEPAGE
flag will be set in mm->def_flags for kvm processes, to prevent any
future thp mappings. In order to also prevent MADV_HUGEPAGE on such an
mm, hugepage_madvise() should check mm->def_flags.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 mm/huge_memory.c |    4 ++++
 1 file changed, 4 insertions(+)

--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1464,6 +1464,8 @@ out:
 int hugepage_madvise(struct vm_area_struct *vma,
 		     unsigned long *vm_flags, int advice)
 {
+	struct mm_struct *mm = vma->vm_mm;
+
 	switch (advice) {
 	case MADV_HUGEPAGE:
 		/*
@@ -1471,6 +1473,8 @@ int hugepage_madvise(struct vm_area_stru
 		 */
 		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
 			return -EINVAL;
+		if (mm->def_flags & VM_NOHUGEPAGE)
+			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
 		/*


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 4/7] thp, s390: thp splitting backend for System z
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
                   ` (2 preceding siblings ...)
  2012-08-23 17:17 ` [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 5/7] thp, s390: thp pagetable pre-allocation " Gerald Schaefer
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-s390-split.patch --]
[-- Type: text/plain, Size: 3508 bytes --]

This patch is part of the architecture backend for thp on System z.
It provides the functions related to thp splitting, including
serialization against gup. Unlike other archs, pmdp_splitting_flush()
cannot use a tlb flushing operation to serialize against gup on s390,
because that wouldn't be stopped by the disabled IRQs. So instead,
smp_call_function() is called with an empty function, which will have
the expected effect.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |   13 +++++++++++++
 arch/s390/mm/gup.c              |   11 ++++++++++-
 arch/s390/mm/pgtable.c          |   18 ++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -347,6 +347,8 @@ extern struct page *vmemmap;
 
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
+#define _SEGMENT_ENTRY_SPLIT_BIT 0	/* THP splitting bit number */
+#define _SEGMENT_ENTRY_SPLIT	(1UL << _SEGMENT_ENTRY_SPLIT_BIT)
 
 /* Page status table bits for virtualization */
 #define RCP_ACC_BITS	0xf000000000000000UL
@@ -506,6 +508,10 @@ static inline int pmd_bad(pmd_t pmd)
 	return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
 }
 
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long addr, pmd_t *pmdp);
+
 static inline int pte_none(pte_t pte)
 {
 	return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
@@ -1159,6 +1165,13 @@ static inline pmd_t *pmd_offset(pud_t *p
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 /*
  * 31 bit swap entry format:
  * A page-table entry has some bits we have to treat in a special way.
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *p
 		pmd = *pmdp;
 		barrier();
 		next = pmd_addr_end(addr, end);
-		if (pmd_none(pmd))
+		/*
+		 * The pmd_trans_splitting() check below explains why
+		 * pmdp_splitting_flush() has to serialize with
+		 * smp_call_function() against our disabled IRQs, to stop
+		 * this gup-fast code from running while we set the
+		 * splitting bit in the pmd. Returning zero will take
+		 * the slow path that will call wait_split_huge_page()
+		 * if the pmd is still in splitting state.
+		 */
+		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
 		if (unlikely(pmd_huge(pmd))) {
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -866,3 +866,21 @@ bool kernel_page_present(struct page *pa
 	return cc == 0;
 }
 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pmdp_splitting_flush_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
+			      (unsigned long *) pmdp)) {
+		/* need to serialize against gup-fast (IRQ disabled) */
+		smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
+	}
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 5/7] thp, s390: thp pagetable pre-allocation for System z
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
                   ` (3 preceding siblings ...)
  2012-08-23 17:17 ` [RFC patch 4/7] thp, s390: thp splitting backend for System z Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 6/7] thp, s390: disable thp for kvm host on " Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 7/7] thp, s390: architecture backend for thp " Gerald Schaefer
  6 siblings, 0 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-s390-pgtable.patch --]
[-- Type: text/plain, Size: 2201 bytes --]

This patch is part of the architecture backend for thp on System z.
It provides the pagetable pre-allocation functions pgtable_deposit()
and pgtable_withdraw(). Unlike other archs, s390 has no struct page *
as pgtable_t, but rather a pointer to the page table. So instead of
saving the pagetable pre-allocation list info inside the struct page,
it is being saved within the pagetable itself.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |    6 ++++++
 arch/s390/mm/pgtable.c          |   38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1166,6 +1166,12 @@ static inline pmd_t *pmd_offset(pud_t *p
 #define pte_unmap(pte) do { } while (0)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_withdraw(struct mm_struct *mm);
+
 static inline int pmd_trans_splitting(pmd_t pmd)
 {
 	return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -883,4 +883,42 @@ void pmdp_splitting_flush(struct vm_area
 		smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
 	}
 }
+
+void pgtable_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	if (!mm->pmd_huge_pte)
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+	mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_withdraw(struct mm_struct *mm)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+	pte_t *ptep;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	pgtable = mm->pmd_huge_pte;
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		mm->pmd_huge_pte = NULL;
+	else {
+		mm->pmd_huge_pte = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	ptep = (pte_t *) pgtable;
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	ptep++;
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	return pgtable;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 6/7] thp, s390: disable thp for kvm host on System z
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
                   ` (4 preceding siblings ...)
  2012-08-23 17:17 ` [RFC patch 5/7] thp, s390: thp pagetable pre-allocation " Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-23 17:17 ` [RFC patch 7/7] thp, s390: architecture backend for thp " Gerald Schaefer
  6 siblings, 0 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-s390-kvm.patch --]
[-- Type: text/plain, Size: 1583 bytes --]

This patch is part of the architecture backend for thp on System z.
It disables thp for kvm hosts, because there is no kvm host hugepage
support so far. Existing thp mappings are split using follow_page()
with FOLL_SPLIT, and future thp mappings are prevented by setting
VM_NOHUGEPAGE in mm->def_flags.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 arch/s390/mm/pgtable.c |   30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -787,6 +787,30 @@ void tlb_remove_table(struct mmu_gather
 		tlb_table_flush(tlb);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void thp_split_vma(struct vm_area_struct *vma)
+{
+	unsigned long addr;
+	struct page *page;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+		page = follow_page(vma, addr, FOLL_SPLIT);
+	}
+}
+
+void thp_split_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma = mm->mmap;
+
+	while (vma != NULL) {
+		thp_split_vma(vma);
+		vma->vm_flags &= ~VM_HUGEPAGE;
+		vma->vm_flags |= VM_NOHUGEPAGE;
+		vma = vma->vm_next;
+	}
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 /*
  * switch on pgstes for its userspace process (for kvm)
  */
@@ -824,6 +848,12 @@ int s390_enable_sie(void)
 	if (!mm)
 		return -ENOMEM;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/* split thp mappings and disable thp for future mappings */
+	thp_split_mm(mm);
+	mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+
 	/* Now lets check again if something happened */
 	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC patch 7/7] thp, s390: architecture backend for thp on System z
  2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
                   ` (5 preceding siblings ...)
  2012-08-23 17:17 ` [RFC patch 6/7] thp, s390: disable thp for kvm host on " Gerald Schaefer
@ 2012-08-23 17:17 ` Gerald Schaefer
  2012-08-24  8:07   ` Heiko Carstens
  6 siblings, 1 reply; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-23 17:17 UTC (permalink / raw)
  To: akpm, aarcange, linux-mm, ak, hughd
  Cc: linux-kernel, schwidefsky, heiko.carstens, gerald.schaefer

[-- Attachment #1: linux-3.5-thp-s390.patch --]
[-- Type: text/plain, Size: 8523 bytes --]

This implements the architecture backend for transparent hugepages
on System z.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
---
 arch/s390/include/asm/hugetlb.h |   18 ----
 arch/s390/include/asm/pgtable.h |  176 ++++++++++++++++++++++++++++++++++++++++
 arch/s390/include/asm/tlb.h     |    1 
 arch/s390/mm/pgtable.c          |   22 +++++
 mm/Kconfig                      |    2 
 5 files changed, 201 insertions(+), 18 deletions(-)

--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -87,23 +87,6 @@ static inline void __pmd_csp(pmd_t *pmdp
 		"	csp %1,%3"
 		: "=m" (*pmdp)
 		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
-{
-	unsigned long sto = (unsigned long) pmdp -
-				pmd_index(address) * sizeof(pmd_t);
-
-	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
-		asm volatile(
-			"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-			: "=m" (*pmdp)
-			: "m" (*pmdp), "a" (sto),
-			  "a" ((address & HPAGE_MASK))
-		);
-	}
-	pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
 }
 
 static inline void huge_ptep_invalidate(struct mm_struct *mm,
@@ -115,6 +98,7 @@ static inline void huge_ptep_invalidate(
 		__pmd_idte(address, pmdp);
 	else
 		__pmd_csp(pmdp);
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
 }
 
 #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -350,6 +350,10 @@ extern struct page *vmemmap;
 #define _SEGMENT_ENTRY_SPLIT_BIT 0	/* THP splitting bit number */
 #define _SEGMENT_ENTRY_SPLIT	(1UL << _SEGMENT_ENTRY_SPLIT_BIT)
 
+/* Set of bits not changed in pmd_modify */
+#define _SEGMENT_CHG_MASK	(_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
+				 | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
+
 /* Page status table bits for virtualization */
 #define RCP_ACC_BITS	0xf000000000000000UL
 #define RCP_FP_BIT	0x0800000000000000UL
@@ -512,6 +516,26 @@ static inline int pmd_bad(pmd_t pmd)
 extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 				 unsigned long addr, pmd_t *pmdp);
 
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+	return 0;
+}
+
 static inline int pte_none(pte_t pte)
 {
 	return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
@@ -1165,6 +1189,22 @@ static inline pmd_t *pmd_offset(pud_t *p
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 
+static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
+{
+	unsigned long sto = (unsigned long) pmdp -
+			    pmd_index(address) * sizeof(pmd_t);
+
+	if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,%2,%3,0,0"
+			: "=m" (*pmdp)
+			: "m" (*pmdp), "a" (sto),
+			  "a" ((address & HPAGE_MASK))
+			: "cc"
+		);
+	}
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_deposit(struct mm_struct *mm, pgtable_t pgtable);
@@ -1176,6 +1216,142 @@ static inline int pmd_trans_splitting(pm
 {
 	return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
 }
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t entry)
+{
+	*pmdp = entry;
+}
+
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	unsigned long pgprot_pmd = 0;
+
+	if (pgprot_val(pgprot) & _PAGE_INVALID) {
+		if (pgprot_val(pgprot) & _PAGE_SWT)
+			pgprot_pmd |= _HPAGE_TYPE_NONE;
+		pgprot_pmd |= _SEGMENT_ENTRY_INV;
+	}
+	if (pgprot_val(pgprot) & _PAGE_RO)
+		pgprot_pmd |= _SEGMENT_ENTRY_RO;
+	return pgprot_pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	return pmd;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+	return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	/* No dirty bit in the segment table entry. */
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	/* No referenced bit in the segment table entry. */
+	return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	/* No referenced bit in the segment table entry. */
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address,
+					    pmd_t *pmdp)
+{
+	int rc = 0;
+	int counter = PTRS_PER_PTE;
+	unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
+
+	asm volatile(
+		"0:	rrbe	0,%2\n"
+		"	la	%2,0(%3,%2)\n"
+		"	brc	12,1f\n"
+		"	lhi	%0,1\n"
+		"1:	brct	%1,0b\n"
+		: "+d" (rc), "+d" (counter), "+a" (pmd_addr)
+		: "a" (4096UL): "cc" );
+	return rc;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long address, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+
+	__pmd_idte(address, pmdp);
+	pmd_clear(pmdp);
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+				     unsigned long address, pmd_t *pmdp)
+{
+	return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp)
+{
+	__pmd_idte(address, pmdp);
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+	pmd_t __pmd;
+	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+	return __pmd;
+}
+
+#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return MACHINE_HAS_HPAGE ? 1 : 0;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	if (pmd_trans_huge(pmd))
+		return pmd_val(pmd) >> HPAGE_SHIFT;
+	else
+		return pmd_val(pmd) >> PAGE_SHIFT;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -137,6 +137,7 @@ static inline void pud_free_tlb(struct m
 #define tlb_start_vma(tlb, vma)			do { } while (0)
 #define tlb_end_vma(tlb, vma)			do { } while (0)
 #define tlb_remove_tlb_entry(tlb, ptep, addr)	do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr)	do { } while (0)
 #define tlb_migrate_finish(mm)			do { } while (0)
 
 #endif /* _S390_TLB_H */
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -898,6 +898,28 @@ bool kernel_page_present(struct page *pa
 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	/* No need to flush TLB
+	 * On s390 reference bits are in storage key and never in TLB */
+	return pmdp_test_and_clear_young(vma, address, pmdp);
+}
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp,
+			  pmd_t entry, int dirty)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	if (pmd_same(*pmdp, entry))
+		return 0;
+	pmdp_invalidate(vma, address, pmdp);
+	set_pmd_at(vma->vm_mm, address, pmdp, entry);
+	return 1;
+}
+
 static void pmdp_splitting_flush_sync(void *arg)
 {
 	/* Simply deliver the interrupt */
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -318,7 +318,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
 
 config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
-	depends on X86 && MMU
+	depends on (X86 || (S390 && 64BIT)) && MMU
 	select COMPACTION
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 7/7] thp, s390: architecture backend for thp on System z
  2012-08-23 17:17 ` [RFC patch 7/7] thp, s390: architecture backend for thp " Gerald Schaefer
@ 2012-08-24  8:07   ` Heiko Carstens
  0 siblings, 0 replies; 14+ messages in thread
From: Heiko Carstens @ 2012-08-24  8:07 UTC (permalink / raw)
  To: Gerald Schaefer
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky

On Thu, Aug 23, 2012 at 07:17:40PM +0200, Gerald Schaefer wrote:
> +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
> +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
> +					    unsigned long address,
> +					    pmd_t *pmdp)
> +{
> +	int rc = 0;
> +	int counter = PTRS_PER_PTE;
> +	unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
> +
> +	asm volatile(
> +		"0:	rrbe	0,%2\n"
> +		"	la	%2,0(%3,%2)\n"
> +		"	brc	12,1f\n"
> +		"	lhi	%0,1\n"
> +		"1:	brct	%1,0b\n"
> +		: "+d" (rc), "+d" (counter), "+a" (pmd_addr)
> +		: "a" (4096UL): "cc" );
> +	return rc;
> +}

Just a small side note: given that rrbe is very expensive you probably
should extend this function so it makes use of the rrbm instruction
if available.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 2/7] thp: introduce pmdp_invalidate()
  2012-08-23 17:17 ` [RFC patch 2/7] thp: introduce pmdp_invalidate() Gerald Schaefer
@ 2012-08-25 12:36   ` Hillf Danton
  2012-08-27 10:27     ` Gerald Schaefer
  0 siblings, 1 reply; 14+ messages in thread
From: Hillf Danton @ 2012-08-25 12:36 UTC (permalink / raw)
  To: Gerald Schaefer
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky,
	heiko.carstens

On Fri, Aug 24, 2012 at 1:17 AM, Gerald Schaefer
<gerald.schaefer@de.ibm.com> wrote:

> +#ifndef __HAVE_ARCH_PMDP_INVALIDATE
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +static inline void pmdp_invalidate(struct vm_area_struct *vma,
> +                                  unsigned long address, pmd_t *pmdp)
> +{
> +       set_pmd_at(vma->vm_mm, address, pmd, pmd_mknotpresent(*pmd));

	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp));  yes?

> +       flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
> +}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags
  2012-08-23 17:17 ` [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags Gerald Schaefer
@ 2012-08-25 12:47   ` Hillf Danton
  2012-08-27 10:39     ` Gerald Schaefer
  0 siblings, 1 reply; 14+ messages in thread
From: Hillf Danton @ 2012-08-25 12:47 UTC (permalink / raw)
  To: Gerald Schaefer
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky,
	heiko.carstens

On Fri, Aug 24, 2012 at 1:17 AM, Gerald Schaefer
<gerald.schaefer@de.ibm.com> wrote:
> This adds a check to hugepage_madvise(), to refuse MADV_HUGEPAGE
> if VM_NOHUGEPAGE is set in mm->def_flags. On System z, the VM_NOHUGEPAGE
> flag will be set in mm->def_flags for kvm processes, to prevent any
> future thp mappings. In order to also prevent MADV_HUGEPAGE on such an
> mm, hugepage_madvise() should check mm->def_flags.
>
> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
> ---
>  mm/huge_memory.c |    4 ++++
>  1 file changed, 4 insertions(+)
>
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1464,6 +1464,8 @@ out:
>  int hugepage_madvise(struct vm_area_struct *vma,
>                      unsigned long *vm_flags, int advice)
>  {
> +       struct mm_struct *mm = vma->vm_mm;
> +
>         switch (advice) {
>         case MADV_HUGEPAGE:
>                 /*
> @@ -1471,6 +1473,8 @@ int hugepage_madvise(struct vm_area_stru
>                  */
>                 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
>                         return -EINVAL;
> +               if (mm->def_flags & VM_NOHUGEPAGE)
> +                       return -EINVAL;

Looks ifdefinery needed for s390 to wrap the added check, and
a brief comment?

>                 *vm_flags &= ~VM_NOHUGEPAGE;
>                 *vm_flags |= VM_HUGEPAGE;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 2/7] thp: introduce pmdp_invalidate()
  2012-08-25 12:36   ` Hillf Danton
@ 2012-08-27 10:27     ` Gerald Schaefer
  0 siblings, 0 replies; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-27 10:27 UTC (permalink / raw)
  To: Hillf Danton
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky,
	heiko.carstens

On Sat, 25 Aug 2012 20:36:37 +0800
Hillf Danton <dhillf@gmail.com> wrote:

> On Fri, Aug 24, 2012 at 1:17 AM, Gerald Schaefer
> <gerald.schaefer@de.ibm.com> wrote:
> 
> > +#ifndef __HAVE_ARCH_PMDP_INVALIDATE
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +static inline void pmdp_invalidate(struct vm_area_struct *vma,
> > +                                  unsigned long address, pmd_t *pmdp)
> > +{
> > +       set_pmd_at(vma->vm_mm, address, pmd, pmd_mknotpresent(*pmd));
> 
> 	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp));  yes?

Ah yes, I mixed that up, thanks.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags
  2012-08-25 12:47   ` Hillf Danton
@ 2012-08-27 10:39     ` Gerald Schaefer
  2012-08-27 12:26       ` Hillf Danton
  0 siblings, 1 reply; 14+ messages in thread
From: Gerald Schaefer @ 2012-08-27 10:39 UTC (permalink / raw)
  To: Hillf Danton
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky,
	heiko.carstens

On Sat, 25 Aug 2012 20:47:37 +0800
Hillf Danton <dhillf@gmail.com> wrote:

> On Fri, Aug 24, 2012 at 1:17 AM, Gerald Schaefer
> <gerald.schaefer@de.ibm.com> wrote:
> > This adds a check to hugepage_madvise(), to refuse MADV_HUGEPAGE
> > if VM_NOHUGEPAGE is set in mm->def_flags. On System z, the VM_NOHUGEPAGE
> > flag will be set in mm->def_flags for kvm processes, to prevent any
> > future thp mappings. In order to also prevent MADV_HUGEPAGE on such an
> > mm, hugepage_madvise() should check mm->def_flags.
> >
> > Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
> > ---
> >  mm/huge_memory.c |    4 ++++
> >  1 file changed, 4 insertions(+)
> >
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -1464,6 +1464,8 @@ out:
> >  int hugepage_madvise(struct vm_area_struct *vma,
> >                      unsigned long *vm_flags, int advice)
> >  {
> > +       struct mm_struct *mm = vma->vm_mm;
> > +
> >         switch (advice) {
> >         case MADV_HUGEPAGE:
> >                 /*
> > @@ -1471,6 +1473,8 @@ int hugepage_madvise(struct vm_area_stru
> >                  */
> >                 if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
> >                         return -EINVAL;
> > +               if (mm->def_flags & VM_NOHUGEPAGE)
> > +                       return -EINVAL;
> 
> Looks ifdefinery needed for s390 to wrap the added check, and
> a brief comment?

Hmm, architecture #ifdefs in common code are ugly. I'd rather keep
the check even if it is redundant right now for other architectures
than s390. It is not a performance critical path, and there may be
other users of that in the future.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags
  2012-08-27 10:39     ` Gerald Schaefer
@ 2012-08-27 12:26       ` Hillf Danton
  0 siblings, 0 replies; 14+ messages in thread
From: Hillf Danton @ 2012-08-27 12:26 UTC (permalink / raw)
  To: gerald.schaefer
  Cc: akpm, aarcange, linux-mm, ak, hughd, linux-kernel, schwidefsky,
	heiko.carstens

On Mon, Aug 27, 2012 at 6:39 PM, Gerald Schaefer
<gerald.schaefer@de.ibm.com> wrote:
> Hmm, architecture #ifdefs in common code are ugly. I'd rather keep
> the check even if it is redundant right now for other architectures
> than s390. It is not a performance critical path, and there may be
> other users of that in the future.

Fair if no changes in semantics

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-08-27 12:26 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-23 17:17 [RFC patch 0/7] thp: transparent hugepages on System z Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 1/7] thp: remove assumptions on pgtable_t type Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 2/7] thp: introduce pmdp_invalidate() Gerald Schaefer
2012-08-25 12:36   ` Hillf Danton
2012-08-27 10:27     ` Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 3/7] thp: make MADV_HUGEPAGE check for mm->def_flags Gerald Schaefer
2012-08-25 12:47   ` Hillf Danton
2012-08-27 10:39     ` Gerald Schaefer
2012-08-27 12:26       ` Hillf Danton
2012-08-23 17:17 ` [RFC patch 4/7] thp, s390: thp splitting backend for System z Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 5/7] thp, s390: thp pagetable pre-allocation " Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 6/7] thp, s390: disable thp for kvm host on " Gerald Schaefer
2012-08-23 17:17 ` [RFC patch 7/7] thp, s390: architecture backend for thp " Gerald Schaefer
2012-08-24  8:07   ` Heiko Carstens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).