All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
       [not found]                 ` <4f76001d-f050-286f-4b6f-790554583eea@bell.net>
@ 2021-01-27 21:18                   ` Helge Deller
  2021-01-28  8:36                     ` Rolf Eike Beer
  2021-02-10 14:52                     ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Guenter Roeck
  0 siblings, 2 replies; 15+ messages in thread
From: Helge Deller @ 2021-01-27 21:18 UTC (permalink / raw)
  To: John David Anglin, linux-parisc; +Cc: Matthew Wilcox

On parisc a spinlock is stored in the next page behind the pgd which
protects against parallel accesses to the pgd. That's why one additional
page (PGD_ALLOC_ORDER) is allocated for the pgd.

Matthew Wilcox suggested that we instead should use a pointer in the
struct page table for this spinlock and noted, that the comments for the
PGD_ORDER and PMD_ORDER defines were wrong.

Both suggestions are addressed in this patch. The pgd spinlock
(parisc_pgd_lock) is stored in the struct page table. In
switch_mm_irqs_off() the physical address of this lock is loaded into
cr28 (tr4) and the pgd into cr25, so that the fault handlers can
directly access the lock.

The currently implemened Hybrid L2/L3 page table scheme (where the pmd
is adjacent to the pgd) is dropped now too.

Suggested-by: Matthew Wilcox <willy@infradead.org>
Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: John David Anglin <dave.anglin@bell.net>

diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 46f8c22c5977..3e02b1f75e54 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/atomic.h>
+#include <linux/spinlock.h>
 #include <asm-generic/mm_hooks.h>

 /* on PA-RISC, we actually have enough contexts to justify an allocator
@@ -50,6 +51,14 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
 		struct mm_struct *next, struct task_struct *tsk)
 {
 	if (prev != next) {
+#ifdef CONFIG_SMP
+		/* phys address of tlb lock in cr28 (tr4) for TLB faults */
+		struct page *page;
+
+		page = virt_to_page((unsigned long) next->pgd);
+		/* BUG_ON(!page->parisc_pgd_lock); */
+		mtctl(__pa(__ldcw_align(&page->parisc_pgd_lock->rlock.raw_lock)), 28);
+#endif
 		mtctl(__pa(next->pgd), 25);
 		load_context(next->context);
 	}
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 6b3f6740a6a6..d00313d1274e 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -112,7 +112,7 @@ extern int npmem_ranges;
 #else
 #define BITS_PER_PTE_ENTRY	2
 #define BITS_PER_PMD_ENTRY	2
-#define BITS_PER_PGD_ENTRY	BITS_PER_PMD_ENTRY
+#define BITS_PER_PGD_ENTRY	2
 #endif
 #define PGD_ENTRY_SIZE	(1UL << BITS_PER_PGD_ENTRY)
 #define PMD_ENTRY_SIZE	(1UL << BITS_PER_PMD_ENTRY)
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index a6482b2ce0ea..9c1a54543c87 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -15,47 +15,45 @@
 #define __HAVE_ARCH_PGD_FREE
 #include <asm-generic/pgalloc.h>

-/* Allocate the top level pgd (page directory)
- *
- * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
- * allocate the first pmd adjacent to the pgd.  This means that we can
- * subtract a constant offset to get to it.  The pmd and pgd sizes are
- * arranged so that a single pmd covers 4GB (giving a full 64-bit
- * process access to 8TB) so our lookups are effectively L2 for the
- * first 4GB of the kernel (i.e. for all ILP32 processes and all the
- * kernel for machines with under 4GB of memory) */
+/* Allocate the top level pgd (page directory) */
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL,
-					       PGD_ALLOC_ORDER);
-	pgd_t *actual_pgd = pgd;
-
-	if (likely(pgd != NULL)) {
-		memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
-#if CONFIG_PGTABLE_LEVELS == 3
-		actual_pgd += PTRS_PER_PGD;
-		/* Populate first pmd with allocated memory.  We mark it
-		 * with PxD_FLAG_ATTACHED as a signal to the system that this
-		 * pmd entry may not be cleared. */
-		set_pgd(actual_pgd, __pgd((PxD_FLAG_PRESENT |
-				        PxD_FLAG_VALID |
-					PxD_FLAG_ATTACHED)
-			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)));
-		/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
-		 * a signal that this pmd may not be freed */
-		set_pgd(pgd, __pgd(PxD_FLAG_ATTACHED));
+	pgd_t *pgd;
+#ifdef CONFIG_SMP
+	spinlock_t *pgd_lock;
+	struct page *page;
 #endif
+
+	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
+	if (unlikely(pgd == NULL))
+		return NULL;
+
+	memset(pgd, 0, PAGE_SIZE << PGD_ORDER);
+
+#ifdef CONFIG_SMP
+	/* allocate & initialize pgd_spinlock() for this PGD */
+	pgd_lock = kmalloc(sizeof(*pgd_lock), GFP_KERNEL);
+	if (unlikely(pgd_lock == NULL)) {
+		free_pages((unsigned long)pgd, PGD_ORDER);
+		return NULL;
 	}
-	spin_lock_init(pgd_spinlock(actual_pgd));
-	return actual_pgd;
+
+	page = virt_to_page((unsigned long) pgd);
+	spin_lock_init(pgd_lock);
+	page->parisc_pgd_lock = pgd_lock;
+#endif
+
+	return pgd;
 }

 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-#if CONFIG_PGTABLE_LEVELS == 3
-	pgd -= PTRS_PER_PGD;
+#ifdef CONFIG_SMP
+	spinlock_t *lock = pgd_spinlock(pgd);
+	kfree(lock);
 #endif
-	free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
+
+	free_pages((unsigned long)pgd, PGD_ORDER);
 }

 #if CONFIG_PGTABLE_LEVELS == 3
@@ -68,43 +66,27 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 			(__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)));
 }

-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
+	pmd_t *pmd;
+
+	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
+	if (pmd)
+		memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
+	return pmd;
 }

 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) {
-		/*
-		 * This is the permanent pmd attached to the pgd;
-		 * cannot free it.
-		 * Increment the counter to compensate for the decrement
-		 * done by generic mm code.
-		 */
-		mm_inc_nr_pmds(mm);
-		return;
-	}
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
-
 #endif

 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-#if CONFIG_PGTABLE_LEVELS == 3
-	/* preserve the gateway marker if this is the beginning of
-	 * the permanent pmd */
-	if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT |
-				PxD_FLAG_VALID |
-				PxD_FLAG_ATTACHED)
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
-	else
-#endif
-		set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
-			+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
+	set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
+		+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 }

 #define pmd_populate(mm, pmd, pte_page) \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 75cf84070fc9..c08c7b37e5f4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -23,8 +23,6 @@
 #include <asm/processor.h>
 #include <asm/cache.h>

-static inline spinlock_t *pgd_spinlock(pgd_t *);
-
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
  * memory.  For the return value to be meaningful, ADDR must be >=
@@ -40,14 +38,18 @@ static inline spinlock_t *pgd_spinlock(pgd_t *);
  */
 #define kern_addr_valid(addr)	(1)

+/* PTE updates are protected by a lock per PGD in the page table struct. */
+extern spinlock_t pa_swapper_pg_lock;
+#ifdef CONFIG_SMP
+extern spinlock_t *pgd_spinlock(pgd_t *pgd);
+#else
+#define pgd_spinlock(x) &pa_swapper_pg_lock
+#endif
+
 /* This is for the serialization of PxTLB broadcasts. At least on the N class
  * systems, only one PxTLB inter processor broadcast can be active at any one
- * time on the Merced bus.
-
- * PTE updates are protected by locks in the PMD.
- */
+ * time on the Merced bus. */
 extern spinlock_t pa_tlb_flush_lock;
-extern spinlock_t pa_swapper_pg_lock;
 #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
 extern int pa_serialize_tlb_flushes;
 #else
@@ -94,10 +96,12 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define set_pte_at(mm, addr, ptep, pteval)			\
 	do {							\
 		unsigned long flags;				\
-		spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
+		spinlock_t *pgd_lock;				\
+		pgd_lock = pgd_spinlock((mm)->pgd);		\
+		spin_lock_irqsave(pgd_lock, flags);		\
 		set_pte(ptep, pteval);				\
 		purge_tlb_entries(mm, addr);			\
-		spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
+		spin_unlock_irqrestore(pgd_lock, flags);	\
 	} while (0)

 #endif /* !__ASSEMBLY__ */
@@ -120,12 +124,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)

 #if CONFIG_PGTABLE_LEVELS == 3
-#define PGD_ORDER	1 /* Number of pages per pgd */
-#define PMD_ORDER	1 /* Number of pages per pmd */
-#define PGD_ALLOC_ORDER	(2 + 1) /* first pgd contains pmd */
+#define PMD_ORDER	1
+#define PGD_ORDER	0
 #else
-#define PGD_ORDER	1 /* Number of pages per pgd */
-#define PGD_ALLOC_ORDER	(PGD_ORDER + 1)
+#define PGD_ORDER	1
 #endif

 /* Definitions for 3rd level (we use PLD here for Page Lower directory
@@ -240,11 +242,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
  * able to effectively address 40/42/44-bits of physical address space
  * depending on 4k/16k/64k PAGE_SIZE */
 #define _PxD_PRESENT_BIT   31
-#define _PxD_ATTACHED_BIT  30
-#define _PxD_VALID_BIT     29
+#define _PxD_VALID_BIT     30

 #define PxD_FLAG_PRESENT  (1 << xlate_pabit(_PxD_PRESENT_BIT))
-#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT))
 #define PxD_FLAG_VALID    (1 << xlate_pabit(_PxD_VALID_BIT))
 #define PxD_FLAG_MASK     (0xf)
 #define PxD_FLAG_SHIFT    (4)
@@ -326,23 +326,10 @@ extern unsigned long *empty_zero_page;
 #define pgd_flag(x)	(pgd_val(x) & PxD_FLAG_MASK)
 #define pgd_address(x)	((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)

-#if CONFIG_PGTABLE_LEVELS == 3
-/* The first entry of the permanent pmd is not there if it contains
- * the gateway marker */
-#define pmd_none(x)	(!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED)
-#else
 #define pmd_none(x)	(!pmd_val(x))
-#endif
 #define pmd_bad(x)	(!(pmd_flag(x) & PxD_FLAG_VALID))
 #define pmd_present(x)	(pmd_flag(x) & PxD_FLAG_PRESENT)
 static inline void pmd_clear(pmd_t *pmd) {
-#if CONFIG_PGTABLE_LEVELS == 3
-	if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-		/* This is the entry pointing to the permanent pmd
-		 * attached to the pgd; cannot clear it */
-		set_pmd(pmd, __pmd(PxD_FLAG_ATTACHED));
-	else
-#endif
 		set_pmd(pmd,  __pmd(0));
 }

@@ -358,12 +345,6 @@ static inline void pmd_clear(pmd_t *pmd) {
 #define pud_bad(x)      (!(pud_flag(x) & PxD_FLAG_VALID))
 #define pud_present(x)  (pud_flag(x) & PxD_FLAG_PRESENT)
 static inline void pud_clear(pud_t *pud) {
-#if CONFIG_PGTABLE_LEVELS == 3
-	if(pud_flag(*pud) & PxD_FLAG_ATTACHED)
-		/* This is the permanent pmd attached to the pud; cannot
-		 * free it */
-		return;
-#endif
 	set_pud(pud, __pud(0));
 }
 #endif
@@ -456,32 +437,25 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })

-
-static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
-{
-	if (unlikely(pgd == swapper_pg_dir))
-		return &pa_swapper_pg_lock;
-	return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
-}
-
-
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 {
 	pte_t pte;
 	unsigned long flags;
+	spinlock_t *pgd_lock;

 	if (!pte_young(*ptep))
 		return 0;

-	spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
+	pgd_lock = pgd_spinlock(vma->vm_mm->pgd);
+	spin_lock_irqsave(pgd_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
+		spin_unlock_irqrestore(pgd_lock, flags);
 		return 0;
 	}
 	set_pte(ptep, pte_mkold(pte));
 	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
+	spin_unlock_irqrestore(pgd_lock, flags);
 	return 1;
 }

@@ -490,12 +464,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 {
 	pte_t old_pte;
 	unsigned long flags;
+	spinlock_t *pgd_lock;

-	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
+	pgd_lock = pgd_spinlock(mm->pgd);
+	spin_lock_irqsave(pgd_lock, flags);
 	old_pte = *ptep;
 	set_pte(ptep, __pte(0));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
+	spin_unlock_irqrestore(pgd_lock, flags);

 	return old_pte;
 }
@@ -503,10 +479,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long flags;
-	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
+	spinlock_t *pgd_lock;
+
+	pgd_lock = pgd_spinlock(mm->pgd);
+	spin_lock_irqsave(pgd_lock, flags);
 	set_pte(ptep, pte_wrprotect(*ptep));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
+	spin_unlock_irqrestore(pgd_lock, flags);
 }

 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 305768a40773..cd2cc1b1648c 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -268,7 +268,6 @@ int main(void)
 	DEFINE(ASM_BITS_PER_PGD, BITS_PER_PGD);
 	DEFINE(ASM_BITS_PER_PMD, BITS_PER_PMD);
 	DEFINE(ASM_BITS_PER_PTE, BITS_PER_PTE);
-	DEFINE(ASM_PGD_PMD_OFFSET, -(PAGE_SIZE << PGD_ORDER));
 	DEFINE(ASM_PMD_ENTRY, ((PAGE_OFFSET & PMD_MASK) >> PMD_SHIFT));
 	DEFINE(ASM_PGD_ENTRY, PAGE_OFFSET >> PGDIR_SHIFT);
 	DEFINE(ASM_PGD_ENTRY_SIZE, PGD_ENTRY_SIZE);
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index beba9816cc6c..538114d81760 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,10 +35,9 @@
 	.level 2.0
 #endif

-	.import		pa_tlb_lock,data
-	.macro  load_pa_tlb_lock reg
-	mfctl		%cr25,\reg
-	addil		L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
+	/* Load parisc_pgd_lock address from cr28/tr4 for this pgd (cr25) */
+	.macro  load_pgd_spinlock reg
+	mfctl	%cr28,\reg
 	.endm

 	/* space_to_prot macro creates a prot id from a space id */
@@ -407,7 +406,9 @@
 # endif
 #endif
 	dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
+#if CONFIG_PGTABLE_LEVELS < 3
 	copy		%r0,\pte
+#endif
 	ldw,s		\index(\pmd),\pmd
 	bb,>=,n		\pmd,_PxD_PRESENT_BIT,\fault
 	dep		%r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
@@ -417,29 +418,14 @@
 	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
 	.endm

-	/* Look up PTE in a 3-Level scheme.
-	 *
-	 * Here we implement a Hybrid L2/L3 scheme: we allocate the
-	 * first pmd adjacent to the pgd.  This means that we can
-	 * subtract a constant offset to get to it.  The pmd and pgd
-	 * sizes are arranged so that a single pmd covers 4GB (giving
-	 * a full LP64 process access to 8TB) so our lookups are
-	 * effectively L2 for the first 4GB of the kernel (i.e. for
-	 * all ILP32 processes and all the kernel for machines with
-	 * under 4GB of memory) */
+	/* Look up PTE in a 3-Level scheme. */
 	.macro		L3_ptep pgd,pte,index,va,fault
-#if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
+#if CONFIG_PGTABLE_LEVELS == 3
+	copy		%r0,\pte
 	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	ldw,s		\index(\pgd),\pgd
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
 	bb,>=,n		\pgd,_PxD_PRESENT_BIT,\fault
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	shld		\pgd,PxD_VALUE_SHIFT,\index
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	copy		\index,\pgd
-	extrd,u,*<>	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	ldo		ASM_PGD_PMD_OFFSET(\pgd),\pgd
+	shld		\pgd,PxD_VALUE_SHIFT,\pgd
 #endif
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
@@ -448,7 +434,7 @@
 	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 98:	cmpib,COND(=),n	0,\spc,2f
-	load_pa_tlb_lock \tmp
+	load_pgd_spinlock \tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
@@ -480,9 +466,9 @@
 	/* Release pa_tlb_lock lock. */
 	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-98:	load_pa_tlb_lock \tmp
-99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+98:	load_pgd_spinlock \tmp
 	tlb_unlock0	\spc,\tmp
+99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	.endm

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 3ec633b11b54..4f3f180b0b20 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -681,6 +681,24 @@ static void __init parisc_bootmem_free(void)
 	free_area_init(max_zone_pfn);
 }

+static void __init parisc_init_pgd_lock(void)
+{
+	struct page *page;
+
+	page = virt_to_page((unsigned long) &swapper_pg_dir);
+	page->parisc_pgd_lock = &pa_swapper_pg_lock;
+}
+
+#ifdef CONFIG_SMP
+spinlock_t *pgd_spinlock(pgd_t *pgd)
+{
+	struct page *page;
+
+	page = virt_to_page((unsigned long) pgd);
+	return page->parisc_pgd_lock;
+}
+#endif
+
 void __init paging_init(void)
 {
 	setup_bootmem();
@@ -691,6 +709,7 @@ void __init paging_init(void)

 	sparse_init();
 	parisc_bootmem_free();
+	parisc_init_pgd_lock();
 }

 #ifdef CONFIG_PA20
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 07d9acb5b19c..d8520af04b7d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -150,6 +150,7 @@ struct page {
 			union {
 				struct mm_struct *pt_mm; /* x86 pgds only */
 				atomic_t pt_frag_refcount; /* powerpc */
+				spinlock_t *parisc_pgd_lock; /* parisc pgds only */
 			};
 #if ALLOC_SPLIT_PTLOCKS
 			spinlock_t *ptl;

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-01-27 21:18                   ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Helge Deller
@ 2021-01-28  8:36                     ` Rolf Eike Beer
  2021-01-28 15:24                       ` [PATCH] parisc: Optimize per-pagetable spinlocks (v12) Helge Deller
  2021-02-10 14:52                     ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Guenter Roeck
  1 sibling, 1 reply; 15+ messages in thread
From: Rolf Eike Beer @ 2021-01-28  8:36 UTC (permalink / raw)
  To: John David Anglin, linux-parisc, Helge Deller; +Cc: Matthew Wilcox

[-- Attachment #1: Type: text/plain, Size: 4603 bytes --]

Am Mittwoch, 27. Januar 2021, 22:18:51 CET schrieb Helge Deller:
> On parisc a spinlock is stored in the next page behind the pgd which
> protects against parallel accesses to the pgd. That's why one additional
> page (PGD_ALLOC_ORDER) is allocated for the pgd.
> 
> Matthew Wilcox suggested that we instead should use a pointer in the
> struct page table for this spinlock and noted, that the comments for the
> PGD_ORDER and PMD_ORDER defines were wrong.
> 
> Both suggestions are addressed in this patch. The pgd spinlock
> (parisc_pgd_lock) is stored in the struct page table. In
> switch_mm_irqs_off() the physical address of this lock is loaded into
> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
> directly access the lock.
> 
> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
> is adjacent to the pgd) is dropped now too.
> 
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
> Signed-off-by: Helge Deller <deller@gmx.de>
> Signed-off-by: John David Anglin <dave.anglin@bell.net>
> 
> diff --git a/arch/parisc/include/asm/mmu_context.h
> b/arch/parisc/include/asm/mmu_context.h index 46f8c22c5977..3e02b1f75e54
> 100644
> --- a/arch/parisc/include/asm/mmu_context.h
> +++ b/arch/parisc/include/asm/mmu_context.h
> @@ -5,6 +5,7 @@
>  #include <linux/mm.h>
>  #include <linux/sched.h>
>  #include <linux/atomic.h>
> +#include <linux/spinlock.h>
>  #include <asm-generic/mm_hooks.h>
> 
>  /* on PA-RISC, we actually have enough contexts to justify an allocator
> @@ -50,6 +51,14 @@ static inline void switch_mm_irqs_off(struct mm_struct
> *prev, struct mm_struct *next, struct task_struct *tsk)
>  {
>  	if (prev != next) {
> +#ifdef CONFIG_SMP
> +		/* phys address of tlb lock in cr28 (tr4) for TLB faults 
*/
> +		struct page *page;
> +
> +		page = virt_to_page((unsigned long) next->pgd);

This is one of the few cases you have a space after the cast.

Another one is in pgd_alloc():

>+       page = virt_to_page((unsigned long) pgd);

> diff --git a/arch/parisc/include/asm/pgalloc.h
> b/arch/parisc/include/asm/pgalloc.h index a6482b2ce0ea..9c1a54543c87 100644
> --- a/arch/parisc/include/asm/pgalloc.h
> +++ b/arch/parisc/include/asm/pgalloc.h
> @@ -68,43 +66,27 @@ static inline void pud_populate(struct mm_struct *mm,
> pud_t *pud, pmd_t *pmd) (__u32)(__pa((unsigned long)pmd) >>
> PxD_VALUE_SHIFT)));
>  }
> 
> -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long
> address) 
> +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned
> long addr)

Does that change add benefit?

> {
> -	return (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
> +	pmd_t *pmd;
> +
> +	pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER);
> +	if (pmd)

Maybe annotate that as likely() as it was in pgd_alloc() before?

> +		memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER);
> +	return pmd;
>  }

> diff --git a/arch/parisc/include/asm/pgtable.h
> b/arch/parisc/include/asm/pgtable.h index 75cf84070fc9..c08c7b37e5f4 100644
> --- a/arch/parisc/include/asm/pgtable.h
> +++ b/arch/parisc/include/asm/pgtable.h
> @@ -94,10 +96,12 @@ static inline void purge_tlb_entries(struct mm_struct
> *mm, unsigned long addr) #define set_pte_at(mm, addr, ptep, pteval)		
	\
>  	do {							
\
>  		unsigned long flags;				
\
> -		spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
> +		spinlock_t *pgd_lock;				
\
> +		pgd_lock = pgd_spinlock((mm)->pgd);		\

These should just fit into a single line.

> +		spin_lock_irqsave(pgd_lock, flags);		\
>  		set_pte(ptep, pteval);				
\
>  		purge_tlb_entries(mm, addr);			
\
> -		spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
> +		spin_unlock_irqrestore(pgd_lock, flags);	\
>  	} while (0)
> 
>  #endif /* !__ASSEMBLY__ */

> diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
> index 3ec633b11b54..4f3f180b0b20 100644
> --- a/arch/parisc/mm/init.c
> +++ b/arch/parisc/mm/init.c
> @@ -681,6 +681,24 @@ static void __init parisc_bootmem_free(void)
>  	free_area_init(max_zone_pfn);
>  }
> 
> +static void __init parisc_init_pgd_lock(void)
> +{
> +	struct page *page;
> +
> +	page = virt_to_page((unsigned long) &swapper_pg_dir);

Another space.

> +	page->parisc_pgd_lock = &pa_swapper_pg_lock;
> +}
> +
> +#ifdef CONFIG_SMP
> +spinlock_t *pgd_spinlock(pgd_t *pgd)
> +{
> +	struct page *page;
> +
> +	page = virt_to_page((unsigned long) pgd);
> +	return page->parisc_pgd_lock;
> +}
> +#endif

This is very simple, and I suspect it being called rather often. Wouldn't it 
make sense to make it inline?

Eike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v12)
  2021-01-28  8:36                     ` Rolf Eike Beer
@ 2021-01-28 15:24                       ` Helge Deller
  0 siblings, 0 replies; 15+ messages in thread
From: Helge Deller @ 2021-01-28 15:24 UTC (permalink / raw)
  To: Rolf Eike Beer, John David Anglin, linux-parisc; +Cc: Matthew Wilcox


[-- Attachment #1.1: Type: text/plain, Size: 2097 bytes --]

On 1/28/21 9:36 AM, Rolf Eike Beer wrote:
> Am Mittwoch, 27. Januar 2021, 22:18:51 CET schrieb Helge Deller:
>> On parisc a spinlock is stored in the next page behind the pgd which
>> protects against parallel accesses to the pgd. That's why one additional
>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>
>> Matthew Wilcox suggested that we instead should use a pointer in the
>> struct page table for this spinlock and noted, that the comments for the
>> PGD_ORDER and PMD_ORDER defines were wrong.
>>
>> Both suggestions are addressed in this patch. The pgd spinlock
>> (parisc_pgd_lock) is stored in the struct page table. In
>> switch_mm_irqs_off() the physical address of this lock is loaded into
>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>> directly access the lock.
>>
>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>> is adjacent to the pgd) is dropped now too.
>>
>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>> Signed-off-by: Helge Deller <deller@gmx.de>
>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>

[...lots of suggestions by Rolf...]

Rolf, thanks a lot for your review.
I've addressed most of your suggestions and published a v12 patch here:
https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/log/?h=pgtable_spinlock-v12

>> diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
>> index 3ec633b11b54..4f3f180b0b20 100644
>> --- a/arch/parisc/mm/init.c
>> +++ b/arch/parisc/mm/init.c
>> +#ifdef CONFIG_SMP
>> +spinlock_t *pgd_spinlock(pgd_t *pgd)
>> +{
>> +	struct page *page;
>> +
>> +	page = virt_to_page((unsigned long) pgd);
>> +	return page->parisc_pgd_lock;
>> +}
>> +#endif
> 
> This is very simple, and I suspect it being called rather often. Wouldn't it 
> make sense to make it inline?

No, it's not simple, that's why I haven't inlined it.
The virt_to_page() expands to many asm instructions based on the selected memory model.

Thanks!
Helge


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-01-27 21:18                   ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Helge Deller
  2021-01-28  8:36                     ` Rolf Eike Beer
@ 2021-02-10 14:52                     ` Guenter Roeck
  2021-02-10 17:23                       ` Helge Deller
  1 sibling, 1 reply; 15+ messages in thread
From: Guenter Roeck @ 2021-02-10 14:52 UTC (permalink / raw)
  To: Helge Deller; +Cc: John David Anglin, linux-parisc, Matthew Wilcox

On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
> On parisc a spinlock is stored in the next page behind the pgd which
> protects against parallel accesses to the pgd. That's why one additional
> page (PGD_ALLOC_ORDER) is allocated for the pgd.
> 
> Matthew Wilcox suggested that we instead should use a pointer in the
> struct page table for this spinlock and noted, that the comments for the
> PGD_ORDER and PMD_ORDER defines were wrong.
> 
> Both suggestions are addressed in this patch. The pgd spinlock
> (parisc_pgd_lock) is stored in the struct page table. In
> switch_mm_irqs_off() the physical address of this lock is loaded into
> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
> directly access the lock.
> 
> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
> is adjacent to the pgd) is dropped now too.
> 
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
> Signed-off-by: Helge Deller <deller@gmx.de>
> Signed-off-by: John David Anglin <dave.anglin@bell.net>

This patch results in:

BUG: spinlock recursion on CPU#0, swapper/0/1
 lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
Hardware name: 9000/778/B160L
Backtrace:
 [<1019f9bc>] show_stack+0x34/0x48
 [<10a65278>] dump_stack+0x94/0x114
 [<10219f4c>] spin_dump+0x8c/0xb8
 [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
 [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
 [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
 [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
 [<102b8900>] __get_user_pages_remote+0x134/0x34c
 [<102b8b80>] get_user_pages_remote+0x68/0x90
 [<102fccb0>] get_arg_page+0x94/0xd8
 [<102fdd84>] copy_string_kernel+0xc4/0x234
 [<102fe70c>] kernel_execve+0xcc/0x1a4
 [<10a58d94>] run_init_process+0xbc/0xe0
 [<10a70d50>] kernel_init+0x98/0x13c
 [<1019a01c>] ret_from_kernel_thread+0x1c/0x24

when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
the problem.

Bitsect log attached.

Guenter

---
# bad: [a4bfd8d46ac357c12529e4eebb6c89502b03ecc9] Add linux-next specific files for 20210209
# good: [92bf22614b21a2706f4993b278017e437f7785b3] Linux 5.11-rc7
git bisect start 'HEAD' 'v5.11-rc7'
# bad: [a8eb921ba7e8e77d994a1c6c69c8ef08456ecf53] Merge remote-tracking branch 'crypto/master'
git bisect bad a8eb921ba7e8e77d994a1c6c69c8ef08456ecf53
# bad: [b68df186dae8ae890df08059bb068b78252b053a] Merge remote-tracking branch 'hid/for-next'
git bisect bad b68df186dae8ae890df08059bb068b78252b053a
# good: [323c9f6fb99b033883b404ecbc811e7b283a60b3] Merge remote-tracking branch 'sunxi/sunxi/for-next'
git bisect good 323c9f6fb99b033883b404ecbc811e7b283a60b3
# bad: [9d40a7a579a5c51fad0d734a4ed39e39b858fca2] Merge remote-tracking branch 'btrfs/for-next'
git bisect bad 9d40a7a579a5c51fad0d734a4ed39e39b858fca2
# bad: [afe0c3efe88f6c295542fd336d5f604115e9184f] Merge remote-tracking branch 'powerpc/next'
git bisect bad afe0c3efe88f6c295542fd336d5f604115e9184f
# good: [c276186556ed2ce6d30da69ce275234a7df85b09] Merge remote-tracking branch 'mips/mips-next'
git bisect good c276186556ed2ce6d30da69ce275234a7df85b09
# good: [755d664174463791489dddf34c33308b61de68c3] powerpc: DebugException remove args
git bisect good 755d664174463791489dddf34c33308b61de68c3
# good: [26418b36a11f2eaf2556aa8cefe86132907e311f] powerpc/64s/radix: refactor TLB flush type selection
git bisect good 26418b36a11f2eaf2556aa8cefe86132907e311f
# bad: [d7bbb31642d2bd4aa5aad3595061a5e96c32d91d] Merge remote-tracking branch 'parisc-hd/for-next'
git bisect bad d7bbb31642d2bd4aa5aad3595061a5e96c32d91d
# good: [2261352157a932717ec08b9dd18d1bfbb7c37c52] Merge remote-tracking branch 'openrisc/or1k-5.11-fixes' into or1k-5.12-updates
git bisect good 2261352157a932717ec08b9dd18d1bfbb7c37c52
# good: [3c92b9eed3ae088ade3688fb356a90926c1c8ee4] Merge remote-tracking branch 'openrisc/for-next'
git bisect good 3c92b9eed3ae088ade3688fb356a90926c1c8ee4
# good: [accb4993d2ee6b644a3d01851cf2fb2c1e0813a6] parisc: Fix IVT checksum calculation wrt HPMC
git bisect good accb4993d2ee6b644a3d01851cf2fb2c1e0813a6
# bad: [4add5f175b1e4e71c06493f9a2c52490d2ea4365] parisc: Optimize per-pagetable spinlocks
git bisect bad 4add5f175b1e4e71c06493f9a2c52490d2ea4365
# good: [0d2d3836dd0a597e514e6231fbf2ae3944f5d38c] parisc: Bump 64-bit IRQ stack size to 64 KB
git bisect good 0d2d3836dd0a597e514e6231fbf2ae3944f5d38c
# first bad commit: [4add5f175b1e4e71c06493f9a2c52490d2ea4365] parisc: Optimize per-pagetable spinlocks

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-10 14:52                     ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Guenter Roeck
@ 2021-02-10 17:23                       ` Helge Deller
  2021-02-10 18:57                         ` John David Anglin
  0 siblings, 1 reply; 15+ messages in thread
From: Helge Deller @ 2021-02-10 17:23 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: John David Anglin, linux-parisc, Matthew Wilcox

On 2/10/21 3:52 PM, Guenter Roeck wrote:
> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>> On parisc a spinlock is stored in the next page behind the pgd which
>> protects against parallel accesses to the pgd. That's why one additional
>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>
>> Matthew Wilcox suggested that we instead should use a pointer in the
>> struct page table for this spinlock and noted, that the comments for the
>> PGD_ORDER and PMD_ORDER defines were wrong.
>>
>> Both suggestions are addressed in this patch. The pgd spinlock
>> (parisc_pgd_lock) is stored in the struct page table. In
>> switch_mm_irqs_off() the physical address of this lock is loaded into
>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>> directly access the lock.
>>
>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>> is adjacent to the pgd) is dropped now too.
>>
>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>> Signed-off-by: Helge Deller <deller@gmx.de>
>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>
> This patch results in:
>
> BUG: spinlock recursion on CPU#0, swapper/0/1
>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
> Hardware name: 9000/778/B160L
> Backtrace:
>   [<1019f9bc>] show_stack+0x34/0x48
>   [<10a65278>] dump_stack+0x94/0x114
>   [<10219f4c>] spin_dump+0x8c/0xb8
>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
>   [<102b8b80>] get_user_pages_remote+0x68/0x90
>   [<102fccb0>] get_arg_page+0x94/0xd8
>   [<102fdd84>] copy_string_kernel+0xc4/0x234
>   [<102fe70c>] kernel_execve+0xcc/0x1a4
>   [<10a58d94>] run_init_process+0xbc/0xe0
>   [<10a70d50>] kernel_init+0x98/0x13c
>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>
> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
> the problem.

True, I can reproduce the problem.
With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.

By the way, the latest version of this patch is in my for-next tree, here:
https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=4add5f175b1e4e71c06493f9a2c52490d2ea4365
Problem happens with it too.

Helge

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-10 17:23                       ` Helge Deller
@ 2021-02-10 18:57                         ` John David Anglin
  2021-02-11  1:20                           ` Guenter Roeck
  0 siblings, 1 reply; 15+ messages in thread
From: John David Anglin @ 2021-02-10 18:57 UTC (permalink / raw)
  To: Helge Deller, Guenter Roeck; +Cc: linux-parisc, Matthew Wilcox

On 2021-02-10 12:23 p.m., Helge Deller wrote:
> On 2/10/21 3:52 PM, Guenter Roeck wrote:
>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>>> On parisc a spinlock is stored in the next page behind the pgd which
>>> protects against parallel accesses to the pgd. That's why one additional
>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>>
>>> Matthew Wilcox suggested that we instead should use a pointer in the
>>> struct page table for this spinlock and noted, that the comments for the
>>> PGD_ORDER and PMD_ORDER defines were wrong.
>>>
>>> Both suggestions are addressed in this patch. The pgd spinlock
>>> (parisc_pgd_lock) is stored in the struct page table. In
>>> switch_mm_irqs_off() the physical address of this lock is loaded into
>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>>> directly access the lock.
>>>
>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>>> is adjacent to the pgd) is dropped now too.
>>>
>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>
>> This patch results in:
>>
>> BUG: spinlock recursion on CPU#0, swapper/0/1
>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
>> Hardware name: 9000/778/B160L
>> Backtrace:
>>   [<1019f9bc>] show_stack+0x34/0x48
>>   [<10a65278>] dump_stack+0x94/0x114
>>   [<10219f4c>] spin_dump+0x8c/0xb8
>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
>>   [<102fccb0>] get_arg_page+0x94/0xd8
>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
>>   [<10a58d94>] run_init_process+0xbc/0xe0
>>   [<10a70d50>] kernel_init+0x98/0x13c
>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>>
>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
>> the problem.
>
> True, I can reproduce the problem.
> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
have a lock that's not initialized.

It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
locked NULL:

       /*
         * We are doing an exec().  'current' is the process
         * doing the exec and bprm->mm is the new process's mm.
         */
        ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
                        &page, NULL, NULL);

Dave

-- 
John David Anglin  dave.anglin@bell.net



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-10 18:57                         ` John David Anglin
@ 2021-02-11  1:20                           ` Guenter Roeck
  2021-02-11 14:38                             ` John David Anglin
  0 siblings, 1 reply; 15+ messages in thread
From: Guenter Roeck @ 2021-02-11  1:20 UTC (permalink / raw)
  To: John David Anglin; +Cc: Helge Deller, linux-parisc, Matthew Wilcox

On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
> On 2021-02-10 12:23 p.m., Helge Deller wrote:
> > On 2/10/21 3:52 PM, Guenter Roeck wrote:
> >> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
> >>> On parisc a spinlock is stored in the next page behind the pgd which
> >>> protects against parallel accesses to the pgd. That's why one additional
> >>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
> >>>
> >>> Matthew Wilcox suggested that we instead should use a pointer in the
> >>> struct page table for this spinlock and noted, that the comments for the
> >>> PGD_ORDER and PMD_ORDER defines were wrong.
> >>>
> >>> Both suggestions are addressed in this patch. The pgd spinlock
> >>> (parisc_pgd_lock) is stored in the struct page table. In
> >>> switch_mm_irqs_off() the physical address of this lock is loaded into
> >>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
> >>> directly access the lock.
> >>>
> >>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
> >>> is adjacent to the pgd) is dropped now too.
> >>>
> >>> Suggested-by: Matthew Wilcox <willy@infradead.org>
> >>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
> >>> Signed-off-by: Helge Deller <deller@gmx.de>
> >>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
> >>
> >> This patch results in:
> >>
> >> BUG: spinlock recursion on CPU#0, swapper/0/1
> >>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
> >> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
> >> Hardware name: 9000/778/B160L
> >> Backtrace:
> >>   [<1019f9bc>] show_stack+0x34/0x48
> >>   [<10a65278>] dump_stack+0x94/0x114
> >>   [<10219f4c>] spin_dump+0x8c/0xb8
> >>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
> >>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
> >>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
> >>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
> >>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
> >>   [<102b8b80>] get_user_pages_remote+0x68/0x90
> >>   [<102fccb0>] get_arg_page+0x94/0xd8
> >>   [<102fdd84>] copy_string_kernel+0xc4/0x234
> >>   [<102fe70c>] kernel_execve+0xcc/0x1a4
> >>   [<10a58d94>] run_init_process+0xbc/0xe0
> >>   [<10a70d50>] kernel_init+0x98/0x13c
> >>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
> >>
> >> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
> >> the problem.
> >
> > True, I can reproduce the problem.
> > With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
> > With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
> > Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
> have a lock that's not initialized.
> 
> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with

The fact that reverting it fixes the problem is a good indication
that the problem does relate to this patch.

As for how - no idea. That is not my area of expertise.

Thanks,
Guenter

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11  1:20                           ` Guenter Roeck
@ 2021-02-11 14:38                             ` John David Anglin
  2021-02-11 21:51                               ` Guenter Roeck
  0 siblings, 1 reply; 15+ messages in thread
From: John David Anglin @ 2021-02-11 14:38 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Helge Deller, linux-parisc, Matthew Wilcox

On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
> On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
>> On 2021-02-10 12:23 p.m., Helge Deller wrote:
>>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
>>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>>>>> On parisc a spinlock is stored in the next page behind the pgd which
>>>>> protects against parallel accesses to the pgd. That's why one additional
>>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>>>>
>>>>> Matthew Wilcox suggested that we instead should use a pointer in the
>>>>> struct page table for this spinlock and noted, that the comments for the
>>>>> PGD_ORDER and PMD_ORDER defines were wrong.
>>>>>
>>>>> Both suggestions are addressed in this patch. The pgd spinlock
>>>>> (parisc_pgd_lock) is stored in the struct page table. In
>>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
>>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>>>>> directly access the lock.
>>>>>
>>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>>>>> is adjacent to the pgd) is dropped now too.
>>>>>
>>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>>>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>>> This patch results in:
>>>>
>>>> BUG: spinlock recursion on CPU#0, swapper/0/1
>>>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
>>>> Hardware name: 9000/778/B160L
>>>> Backtrace:
>>>>   [<1019f9bc>] show_stack+0x34/0x48
>>>>   [<10a65278>] dump_stack+0x94/0x114
>>>>   [<10219f4c>] spin_dump+0x8c/0xb8
>>>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>>>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>>>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>>>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>>>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
>>>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
>>>>   [<102fccb0>] get_arg_page+0x94/0xd8
>>>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
>>>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
>>>>   [<10a58d94>] run_init_process+0xbc/0xe0
>>>>   [<10a70d50>] kernel_init+0x98/0x13c
>>>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>>>>
>>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
>>>> the problem.
>>> True, I can reproduce the problem.
>>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
>>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
>>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
>> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
>> have a lock that's not initialized.
>>
>> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
> The fact that reverting it fixes the problem is a good indication
> that the problem does relate to this patch.
>
> As for how - no idea. That is not my area of expertise.
I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
builds work fine on c8000.

The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
of the lock pointer or the lock initialization in the PA 1.x build for qemu.

-- 
John David Anglin  dave.anglin@bell.net



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 14:38                             ` John David Anglin
@ 2021-02-11 21:51                               ` Guenter Roeck
  2021-02-11 22:16                                 ` John David Anglin
  0 siblings, 1 reply; 15+ messages in thread
From: Guenter Roeck @ 2021-02-11 21:51 UTC (permalink / raw)
  To: John David Anglin; +Cc: Helge Deller, linux-parisc, Matthew Wilcox

On Thu, Feb 11, 2021 at 09:38:25AM -0500, John David Anglin wrote:
> On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
> > On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
> >> On 2021-02-10 12:23 p.m., Helge Deller wrote:
> >>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
> >>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
> >>>>> On parisc a spinlock is stored in the next page behind the pgd which
> >>>>> protects against parallel accesses to the pgd. That's why one additional
> >>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
> >>>>>
> >>>>> Matthew Wilcox suggested that we instead should use a pointer in the
> >>>>> struct page table for this spinlock and noted, that the comments for the
> >>>>> PGD_ORDER and PMD_ORDER defines were wrong.
> >>>>>
> >>>>> Both suggestions are addressed in this patch. The pgd spinlock
> >>>>> (parisc_pgd_lock) is stored in the struct page table. In
> >>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
> >>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
> >>>>> directly access the lock.
> >>>>>
> >>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
> >>>>> is adjacent to the pgd) is dropped now too.
> >>>>>
> >>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
> >>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
> >>>>> Signed-off-by: Helge Deller <deller@gmx.de>
> >>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
> >>>> This patch results in:
> >>>>
> >>>> BUG: spinlock recursion on CPU#0, swapper/0/1
> >>>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
> >>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
> >>>> Hardware name: 9000/778/B160L
> >>>> Backtrace:
> >>>>   [<1019f9bc>] show_stack+0x34/0x48
> >>>>   [<10a65278>] dump_stack+0x94/0x114
> >>>>   [<10219f4c>] spin_dump+0x8c/0xb8
> >>>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
> >>>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
> >>>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
> >>>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
> >>>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
> >>>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
> >>>>   [<102fccb0>] get_arg_page+0x94/0xd8
> >>>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
> >>>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
> >>>>   [<10a58d94>] run_init_process+0xbc/0xe0
> >>>>   [<10a70d50>] kernel_init+0x98/0x13c
> >>>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
> >>>>
> >>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
> >>>> the problem.
> >>> True, I can reproduce the problem.
> >>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
> >>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
> >>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
> >> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
> >> have a lock that's not initialized.
> >>
> >> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
> > The fact that reverting it fixes the problem is a good indication
> > that the problem does relate to this patch.
> >
> > As for how - no idea. That is not my area of expertise.
> I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
> builds work fine on c8000.
> 
> The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
> Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
> there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
> and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
> of the lock pointer or the lock initialization in the PA 1.x build for qemu.
> 

The first lock is acquired in mm/memory.c from line 3565:

        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
                        &vmf->ptl);

The second (recursive) lock is acquired from line 3587 in the same
function:

        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);

Source code lines are from next-20210211. I confirmed with debug code
that the lock address passed to do_raw_spin_lock() is the same in both
calls.

Guenter

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 21:51                               ` Guenter Roeck
@ 2021-02-11 22:16                                 ` John David Anglin
  2021-02-11 23:12                                   ` Guenter Roeck
  2021-02-11 23:14                                   ` Helge Deller
  0 siblings, 2 replies; 15+ messages in thread
From: John David Anglin @ 2021-02-11 22:16 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: Helge Deller, linux-parisc, Matthew Wilcox

On 2021-02-11 4:51 p.m., Guenter Roeck wrote:
> On Thu, Feb 11, 2021 at 09:38:25AM -0500, John David Anglin wrote:
>> On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
>>> On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
>>>> On 2021-02-10 12:23 p.m., Helge Deller wrote:
>>>>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
>>>>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>>>>>>> On parisc a spinlock is stored in the next page behind the pgd which
>>>>>>> protects against parallel accesses to the pgd. That's why one additional
>>>>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>>>>>>
>>>>>>> Matthew Wilcox suggested that we instead should use a pointer in the
>>>>>>> struct page table for this spinlock and noted, that the comments for the
>>>>>>> PGD_ORDER and PMD_ORDER defines were wrong.
>>>>>>>
>>>>>>> Both suggestions are addressed in this patch. The pgd spinlock
>>>>>>> (parisc_pgd_lock) is stored in the struct page table. In
>>>>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
>>>>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>>>>>>> directly access the lock.
>>>>>>>
>>>>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>>>>>>> is adjacent to the pgd) is dropped now too.
>>>>>>>
>>>>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>>>>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>>>>>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>>>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>>>>> This patch results in:
>>>>>>
>>>>>> BUG: spinlock recursion on CPU#0, swapper/0/1
>>>>>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
>>>>>> Hardware name: 9000/778/B160L
>>>>>> Backtrace:
>>>>>>   [<1019f9bc>] show_stack+0x34/0x48
>>>>>>   [<10a65278>] dump_stack+0x94/0x114
>>>>>>   [<10219f4c>] spin_dump+0x8c/0xb8
>>>>>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>>>>>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>>>>>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>>>>>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>>>>>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
>>>>>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
>>>>>>   [<102fccb0>] get_arg_page+0x94/0xd8
>>>>>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
>>>>>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
>>>>>>   [<10a58d94>] run_init_process+0xbc/0xe0
>>>>>>   [<10a70d50>] kernel_init+0x98/0x13c
>>>>>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>>>>>>
>>>>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
>>>>>> the problem.
>>>>> True, I can reproduce the problem.
>>>>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
>>>>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
>>>>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
>>>> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
>>>> have a lock that's not initialized.
>>>>
>>>> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
>>> The fact that reverting it fixes the problem is a good indication
>>> that the problem does relate to this patch.
>>>
>>> As for how - no idea. That is not my area of expertise.
>> I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
>> builds work fine on c8000.
>>
>> The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
>> Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
>> there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
>> and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
>> of the lock pointer or the lock initialization in the PA 1.x build for qemu.
>>
> The first lock is acquired in mm/memory.c from line 3565:
>
>         vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>                         &vmf->ptl);
>
> The second (recursive) lock is acquired from line 3587 in the same
> function:
>
>         set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
>
> Source code lines are from next-20210211. I confirmed with debug code
> that the lock address passed to do_raw_spin_lock() is the same in both
> calls.
Thanks Guenter.  I assume this is with v15 of the patch?

It looks as if we might not need the ptl lock in set_pte_at() and probably elsewhere.

But I'm still puzzled as to why this doesn't happen when different locks are used as in your
report with the earlier patch. 

-- 
John David Anglin  dave.anglin@bell.net



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 22:16                                 ` John David Anglin
@ 2021-02-11 23:12                                   ` Guenter Roeck
  2021-02-11 23:14                                   ` Helge Deller
  1 sibling, 0 replies; 15+ messages in thread
From: Guenter Roeck @ 2021-02-11 23:12 UTC (permalink / raw)
  To: John David Anglin; +Cc: Helge Deller, linux-parisc, Matthew Wilcox

On 2/11/21 2:16 PM, John David Anglin wrote:
> On 2021-02-11 4:51 p.m., Guenter Roeck wrote:
>> On Thu, Feb 11, 2021 at 09:38:25AM -0500, John David Anglin wrote:
>>> On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
>>>> On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
>>>>> On 2021-02-10 12:23 p.m., Helge Deller wrote:
>>>>>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
>>>>>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>>>>>>>> On parisc a spinlock is stored in the next page behind the pgd which
>>>>>>>> protects against parallel accesses to the pgd. That's why one additional
>>>>>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>>>>>>>
>>>>>>>> Matthew Wilcox suggested that we instead should use a pointer in the
>>>>>>>> struct page table for this spinlock and noted, that the comments for the
>>>>>>>> PGD_ORDER and PMD_ORDER defines were wrong.
>>>>>>>>
>>>>>>>> Both suggestions are addressed in this patch. The pgd spinlock
>>>>>>>> (parisc_pgd_lock) is stored in the struct page table. In
>>>>>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
>>>>>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>>>>>>>> directly access the lock.
>>>>>>>>
>>>>>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>>>>>>>> is adjacent to the pgd) is dropped now too.
>>>>>>>>
>>>>>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>>>>>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>>>>>>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>>>>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>>>>>> This patch results in:
>>>>>>>
>>>>>>> BUG: spinlock recursion on CPU#0, swapper/0/1
>>>>>>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
>>>>>>> Hardware name: 9000/778/B160L
>>>>>>> Backtrace:
>>>>>>>   [<1019f9bc>] show_stack+0x34/0x48
>>>>>>>   [<10a65278>] dump_stack+0x94/0x114
>>>>>>>   [<10219f4c>] spin_dump+0x8c/0xb8
>>>>>>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>>>>>>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>>>>>>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>>>>>>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>>>>>>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
>>>>>>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
>>>>>>>   [<102fccb0>] get_arg_page+0x94/0xd8
>>>>>>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
>>>>>>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
>>>>>>>   [<10a58d94>] run_init_process+0xbc/0xe0
>>>>>>>   [<10a70d50>] kernel_init+0x98/0x13c
>>>>>>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>>>>>>>
>>>>>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
>>>>>>> the problem.
>>>>>> True, I can reproduce the problem.
>>>>>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
>>>>>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
>>>>>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
>>>>> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
>>>>> have a lock that's not initialized.
>>>>>
>>>>> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
>>>> The fact that reverting it fixes the problem is a good indication
>>>> that the problem does relate to this patch.
>>>>
>>>> As for how - no idea. That is not my area of expertise.
>>> I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
>>> builds work fine on c8000.
>>>
>>> The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
>>> Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
>>> there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
>>> and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
>>> of the lock pointer or the lock initialization in the PA 1.x build for qemu.
>>>
>> The first lock is acquired in mm/memory.c from line 3565:
>>
>>         vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>>                         &vmf->ptl);
>>
>> The second (recursive) lock is acquired from line 3587 in the same
>> function:
>>
>>         set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
>>
>> Source code lines are from next-20210211. I confirmed with deb ug code
>> that the lock address passed to do_raw_spin_lock() is the same in both
>> calls.
> Thanks Guenter.  I assume this is with v15 of the patch?
> 
I have no idea what version it is, sorry. It is with the version
that is in next-20210211. The problem was first seen with next-20210201.

> It looks as if we might not need the ptl lock in set_pte_at() and probably elsewhere.
> 
> But I'm still puzzled as to why this doesn't happen when different locks are used as in your
> report with the earlier patch. 
> 
Maybe I reported it against the wrong version ? If so, sorry, my bad.

Guenter

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 22:16                                 ` John David Anglin
  2021-02-11 23:12                                   ` Guenter Roeck
@ 2021-02-11 23:14                                   ` Helge Deller
  2021-02-11 23:22                                     ` Helge Deller
                                                       ` (2 more replies)
  1 sibling, 3 replies; 15+ messages in thread
From: Helge Deller @ 2021-02-11 23:14 UTC (permalink / raw)
  To: John David Anglin
  Cc: Guenter Roeck, Helge Deller, linux-parisc, Matthew Wilcox

* John David Anglin <dave.anglin@bell.net>:
> On 2021-02-11 4:51 p.m., Guenter Roeck wrote:
> > On Thu, Feb 11, 2021 at 09:38:25AM -0500, John David Anglin wrote:
> >> On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
> >>> On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
> >>>> On 2021-02-10 12:23 p.m., Helge Deller wrote:
> >>>>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
> >>>>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
> >>>>>>> On parisc a spinlock is stored in the next page behind the pgd which
> >>>>>>> protects against parallel accesses to the pgd. That's why one additional
> >>>>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
> >>>>>>>
> >>>>>>> Matthew Wilcox suggested that we instead should use a pointer in the
> >>>>>>> struct page table for this spinlock and noted, that the comments for the
> >>>>>>> PGD_ORDER and PMD_ORDER defines were wrong.
> >>>>>>>
> >>>>>>> Both suggestions are addressed in this patch. The pgd spinlock
> >>>>>>> (parisc_pgd_lock) is stored in the struct page table. In
> >>>>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
> >>>>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
> >>>>>>> directly access the lock.
> >>>>>>>
> >>>>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
> >>>>>>> is adjacent to the pgd) is dropped now too.
> >>>>>>>
> >>>>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
> >>>>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
> >>>>>>> Signed-off-by: Helge Deller <deller@gmx.de>
> >>>>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
> >>>>>> This patch results in:
> >>>>>>
> >>>>>> BUG: spinlock recursion on CPU#0, swapper/0/1
> >>>>>>   lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
> >>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
> >>>>>> Hardware name: 9000/778/B160L
> >>>>>> Backtrace:
> >>>>>>   [<1019f9bc>] show_stack+0x34/0x48
> >>>>>>   [<10a65278>] dump_stack+0x94/0x114
> >>>>>>   [<10219f4c>] spin_dump+0x8c/0xb8
> >>>>>>   [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
> >>>>>>   [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
> >>>>>>   [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
> >>>>>>   [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
> >>>>>>   [<102b8900>] __get_user_pages_remote+0x134/0x34c
> >>>>>>   [<102b8b80>] get_user_pages_remote+0x68/0x90
> >>>>>>   [<102fccb0>] get_arg_page+0x94/0xd8
> >>>>>>   [<102fdd84>] copy_string_kernel+0xc4/0x234
> >>>>>>   [<102fe70c>] kernel_execve+0xcc/0x1a4
> >>>>>>   [<10a58d94>] run_init_process+0xbc/0xe0
> >>>>>>   [<10a70d50>] kernel_init+0x98/0x13c
> >>>>>>   [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
> >>>>>>
> >>>>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
> >>>>>> the problem.
> >>>>> True, I can reproduce the problem.
> >>>>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
> >>>>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
> >>>>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
> >>>> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
> >>>> have a lock that's not initialized.
> >>>>
> >>>> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
> >>> The fact that reverting it fixes the problem is a good indication
> >>> that the problem does relate to this patch.
> >>>
> >>> As for how - no idea. That is not my area of expertise.
> >> I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
> >> builds work fine on c8000.
> >>
> >> The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
> >> Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
> >> there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
> >> and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
> >> of the lock pointer or the lock initialization in the PA 1.x build for qemu.
> >>
> > The first lock is acquired in mm/memory.c from line 3565:
> >
> >         vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
> >                         &vmf->ptl);
> >
> > The second (recursive) lock is acquired from line 3587 in the same
> > function:
> >
> >         set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
> >
> > Source code lines are from next-20210211. I confirmed with debug code
> > that the lock address passed to do_raw_spin_lock() is the same in both
> > calls.
> Thanks Guenter.  I assume this is with v15 of the patch?

Yes, happens with latest version too.

> It looks as if we might not need the ptl lock in set_pte_at() and probably elsewhere.

Just removing the locks from set_pte_at() didn't solved it.
After removing the others too, it works.
Patch is attached below.
I added a memory-barrier to set_pte() too.

> But I'm still puzzled as to why this doesn't happen when different locks are used as in your
> report with the earlier patch. 

I think it happened earlier too. Would need to test though.

Helge

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 2e1873cd4877..2c68010d896a 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -82,17 +82,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
  */
 #define set_pte(pteptr, pteval)                                 \
         do{                                                     \
-                *(pteptr) = (pteval);                           \
+		*(pteptr) = (pteval);				\
+		mb();						\
         } while(0)

 #define set_pte_at(mm, addr, ptep, pteval)			\
 	do {							\
-		unsigned long flags;				\
-		spinlock_t *pgd_lock = &(mm)->page_table_lock;	\
-		spin_lock_irqsave(pgd_lock, flags);		\
 		set_pte(ptep, pteval);				\
 		purge_tlb_entries(mm, addr);			\
-		spin_unlock_irqrestore(pgd_lock, flags);	\
 	} while (0)

 #endif /* !__ASSEMBLY__ */
@@ -431,22 +428,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 {
 	pte_t pte;
-	unsigned long flags;
-	spinlock_t *pgd_lock;

 	if (!pte_young(*ptep))
 		return 0;

-	pgd_lock = &vma->vm_mm->page_table_lock;
-	spin_lock_irqsave(pgd_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(pgd_lock, flags);
 		return 0;
 	}
-	set_pte(ptep, pte_mkold(pte));
-	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(pgd_lock, flags);
+	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
 	return 1;
 }

@@ -454,29 +444,16 @@ struct mm_struct;
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pte_t old_pte;
-	unsigned long flags;
-	spinlock_t *pgd_lock;

-	pgd_lock = &mm->page_table_lock;
-	spin_lock_irqsave(pgd_lock, flags);
 	old_pte = *ptep;
-	set_pte(ptep, __pte(0));
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(pgd_lock, flags);
+	set_pte_at(mm, addr, ptep, __pte(0));

 	return old_pte;
 }

 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	unsigned long flags;
-	spinlock_t *pgd_lock;
-
-	pgd_lock = &mm->page_table_lock;
-	spin_lock_irqsave(pgd_lock, flags);
-	set_pte(ptep, pte_wrprotect(*ptep));
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(pgd_lock, flags);
+	set_pte_at(mm, addr, ptep, pte_wrprotect(*ptep));
 }

 #define pte_same(A,B)	(pte_val(A) == pte_val(B))

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 23:14                                   ` Helge Deller
@ 2021-02-11 23:22                                     ` Helge Deller
  2021-02-11 23:23                                     ` John David Anglin
  2021-02-11 23:34                                     ` John David Anglin
  2 siblings, 0 replies; 15+ messages in thread
From: Helge Deller @ 2021-02-11 23:22 UTC (permalink / raw)
  To: John David Anglin; +Cc: Guenter Roeck, linux-parisc, Matthew Wilcox

On 2/12/21 12:14 AM, Helge Deller wrote:
> * John David Anglin <dave.anglin@bell.net>:
>> On 2021-02-11 4:51 p.m., Guenter Roeck wrote:
>>> On Thu, Feb 11, 2021 at 09:38:25AM -0500, John David Anglin wrote:
>>>> On 2021-02-10 8:20 p.m., Guenter Roeck wrote:
>>>>> On Wed, Feb 10, 2021 at 01:57:42PM -0500, John David Anglin wrote:
>>>>>> On 2021-02-10 12:23 p.m., Helge Deller wrote:
>>>>>>> On 2/10/21 3:52 PM, Guenter Roeck wrote:
>>>>>>>> On Wed, Jan 27, 2021 at 10:18:51PM +0100, Helge Deller wrote:
>>>>>>>>> On parisc a spinlock is stored in the next page behind the pgd which
>>>>>>>>> protects against parallel accesses to the pgd. That's why one additional
>>>>>>>>> page (PGD_ALLOC_ORDER) is allocated for the pgd.
>>>>>>>>>
>>>>>>>>> Matthew Wilcox suggested that we instead should use a pointer in the
>>>>>>>>> struct page table for this spinlock and noted, that the comments for the
>>>>>>>>> PGD_ORDER and PMD_ORDER defines were wrong.
>>>>>>>>>
>>>>>>>>> Both suggestions are addressed in this patch. The pgd spinlock
>>>>>>>>> (parisc_pgd_lock) is stored in the struct page table. In
>>>>>>>>> switch_mm_irqs_off() the physical address of this lock is loaded into
>>>>>>>>> cr28 (tr4) and the pgd into cr25, so that the fault handlers can
>>>>>>>>> directly access the lock.
>>>>>>>>>
>>>>>>>>> The currently implemened Hybrid L2/L3 page table scheme (where the pmd
>>>>>>>>> is adjacent to the pgd) is dropped now too.
>>>>>>>>>
>>>>>>>>> Suggested-by: Matthew Wilcox <willy@infradead.org>
>>>>>>>>> Fixes: b37d1c1898b2 ("parisc: Use per-pagetable spinlock")
>>>>>>>>> Signed-off-by: Helge Deller <deller@gmx.de>
>>>>>>>>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>>>>>>>> This patch results in:
>>>>>>>>
>>>>>>>> BUG: spinlock recursion on CPU#0, swapper/0/1
>>>>>>>>    lock: 0x12226d14, .magic: dead4ead, .owner: swapper/0/1, .owner_cpu: 0
>>>>>>>> CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc7-next-20210209-32bit #1
>>>>>>>> Hardware name: 9000/778/B160L
>>>>>>>> Backtrace:
>>>>>>>>    [<1019f9bc>] show_stack+0x34/0x48
>>>>>>>>    [<10a65278>] dump_stack+0x94/0x114
>>>>>>>>    [<10219f4c>] spin_dump+0x8c/0xb8
>>>>>>>>    [<1021a0b4>] do_raw_spin_lock+0xdc/0x108
>>>>>>>>    [<10a7367c>] _raw_spin_lock_irqsave+0x30/0x48
>>>>>>>>    [<102bf41c>] handle_mm_fault+0x5e8/0xdb0
>>>>>>>>    [<102b813c>] __get_user_pages.part.0+0x1b0/0x3d4
>>>>>>>>    [<102b8900>] __get_user_pages_remote+0x134/0x34c
>>>>>>>>    [<102b8b80>] get_user_pages_remote+0x68/0x90
>>>>>>>>    [<102fccb0>] get_arg_page+0x94/0xd8
>>>>>>>>    [<102fdd84>] copy_string_kernel+0xc4/0x234
>>>>>>>>    [<102fe70c>] kernel_execve+0xcc/0x1a4
>>>>>>>>    [<10a58d94>] run_init_process+0xbc/0xe0
>>>>>>>>    [<10a70d50>] kernel_init+0x98/0x13c
>>>>>>>>    [<1019a01c>] ret_from_kernel_thread+0x1c/0x24
>>>>>>>>
>>>>>>>> when trying to boot parisc/hppa images in qemu. Reverting this patch fixes
>>>>>>>> the problem.
>>>>>>> True, I can reproduce the problem.
>>>>>>> With CONFIG_DEBUG_SPINLOCK=y you get the backtrace above.
>>>>>>> With  CONFIG_DEBUG_SPINLOCK=n it just hangs.
>>>>>>> Happenes with 32-bit kernel with SMP kernel, even if only one virtual CPU is started.
>>>>>> Which is quite puzzling since most spin locks are optimized in this case case.  Strikes me we
>>>>>> have a lock that's not initialized.
>>>>>>
>>>>>> It's not obvious how this relates to the patch.  get_arg_page() calls get_user_pages_remote() with
>>>>> The fact that reverting it fixes the problem is a good indication
>>>>> that the problem does relate to this patch.
>>>>>
>>>>> As for how - no idea. That is not my area of expertise.
>>>> I built Helge's for-next tree both with CONFIG_DEBUG_SPINLOCK=y and CONFIG_DEBUG_SPINLOCK=n.  Both
>>>> builds work fine on c8000.
>>>>
>>>> The only thing that might have changed in the patch is the alignment of the lock used for page table updates.
>>>> Qemu only support PA 1.x instructions.  The ldcw instruction needs 16-byte alignment on real hardware and
>>>> there is code to dynamically align lock pointers to 16-byte alignment.  The c8000 supports PA 2.0 instructions
>>>> and the ldcw,co instruction only needs 4-byte alignment.  Perhaps there is an issue with the dynamic alignment
>>>> of the lock pointer or the lock initialization in the PA 1.x build for qemu.
>>>>
>>> The first lock is acquired in mm/memory.c from line 3565:
>>>
>>>          vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>>>                          &vmf->ptl);
>>>
>>> The second (recursive) lock is acquired from line 3587 in the same
>>> function:
>>>
>>>          set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
>>>
>>> Source code lines are from next-20210211. I confirmed with debug code
>>> that the lock address passed to do_raw_spin_lock() is the same in both
>>> calls.
>> Thanks Guenter.  I assume this is with v15 of the patch?
>
> Yes, happens with latest version too.
>
>> It looks as if we might not need the ptl lock in set_pte_at() and probably elsewhere.
>
> Just removing the locks from set_pte_at() didn't solved it.
> After removing the others too, it works.
> Patch is attached below.
> I added a memory-barrier to set_pte() too.
>
>> But I'm still puzzled as to why this doesn't happen when different locks are used as in your
>> report with the earlier patch.
>
> I think it happened earlier too. Would need to test though.
>
> Helge
>
> diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
> index 2e1873cd4877..2c68010d896a 100644
> --- a/arch/parisc/include/asm/pgtable.h
> +++ b/arch/parisc/include/asm/pgtable.h
> @@ -82,17 +82,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
>    */
>   #define set_pte(pteptr, pteval)                                 \
>           do{                                                     \
> -                *(pteptr) = (pteval);                           \
> +		*(pteptr) = (pteval);				\
> +		mb();						\

Actually, this should be barrier() instead of mb().
mb() is overkill on SMP.

Helge



>           } while(0)
>
>   #define set_pte_at(mm, addr, ptep, pteval)			\
>   	do {							\
> -		unsigned long flags;				\
> -		spinlock_t *pgd_lock = &(mm)->page_table_lock;	\
> -		spin_lock_irqsave(pgd_lock, flags);		\
>   		set_pte(ptep, pteval);				\
>   		purge_tlb_entries(mm, addr);			\
> -		spin_unlock_irqrestore(pgd_lock, flags);	\
>   	} while (0)
>
>   #endif /* !__ASSEMBLY__ */
> @@ -431,22 +428,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
>   static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
>   {
>   	pte_t pte;
> -	unsigned long flags;
> -	spinlock_t *pgd_lock;
>
>   	if (!pte_young(*ptep))
>   		return 0;
>
> -	pgd_lock = &vma->vm_mm->page_table_lock;
> -	spin_lock_irqsave(pgd_lock, flags);
>   	pte = *ptep;
>   	if (!pte_young(pte)) {
> -		spin_unlock_irqrestore(pgd_lock, flags);
>   		return 0;
>   	}
> -	set_pte(ptep, pte_mkold(pte));
> -	purge_tlb_entries(vma->vm_mm, addr);
> -	spin_unlock_irqrestore(pgd_lock, flags);
> +	set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
>   	return 1;
>   }
>
> @@ -454,29 +444,16 @@ struct mm_struct;
>   static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>   {
>   	pte_t old_pte;
> -	unsigned long flags;
> -	spinlock_t *pgd_lock;
>
> -	pgd_lock = &mm->page_table_lock;
> -	spin_lock_irqsave(pgd_lock, flags);
>   	old_pte = *ptep;
> -	set_pte(ptep, __pte(0));
> -	purge_tlb_entries(mm, addr);
> -	spin_unlock_irqrestore(pgd_lock, flags);
> +	set_pte_at(mm, addr, ptep, __pte(0));
>
>   	return old_pte;
>   }
>
>   static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>   {
> -	unsigned long flags;
> -	spinlock_t *pgd_lock;
> -
> -	pgd_lock = &mm->page_table_lock;
> -	spin_lock_irqsave(pgd_lock, flags);
> -	set_pte(ptep, pte_wrprotect(*ptep));
> -	purge_tlb_entries(mm, addr);
> -	spin_unlock_irqrestore(pgd_lock, flags);
> +	set_pte_at(mm, addr, ptep, pte_wrprotect(*ptep));
>   }
>
>   #define pte_same(A,B)	(pte_val(A) == pte_val(B))
>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 23:14                                   ` Helge Deller
  2021-02-11 23:22                                     ` Helge Deller
@ 2021-02-11 23:23                                     ` John David Anglin
  2021-02-11 23:34                                     ` John David Anglin
  2 siblings, 0 replies; 15+ messages in thread
From: John David Anglin @ 2021-02-11 23:23 UTC (permalink / raw)
  To: Helge Deller; +Cc: Guenter Roeck, linux-parisc, Matthew Wilcox

On 2021-02-11 6:14 p.m., Helge Deller wrote:
> Yes, happens with latest version too.
>
>> It looks as if we might not need the ptl lock in set_pte_at() and probably elsewhere.
> Just removing the locks from set_pte_at() didn't solved it.
> After removing the others too, it works.
> Patch is attached below.
> I added a memory-barrier to set_pte() too.
You are ahead of me.  Should the memory barrier be a smp_mb()?

Dave

-- 
John David Anglin  dave.anglin@bell.net


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] parisc: Optimize per-pagetable spinlocks (v11)
  2021-02-11 23:14                                   ` Helge Deller
  2021-02-11 23:22                                     ` Helge Deller
  2021-02-11 23:23                                     ` John David Anglin
@ 2021-02-11 23:34                                     ` John David Anglin
  2 siblings, 0 replies; 15+ messages in thread
From: John David Anglin @ 2021-02-11 23:34 UTC (permalink / raw)
  To: Helge Deller; +Cc: Guenter Roeck, linux-parisc, Matthew Wilcox

On 2021-02-11 6:14 p.m., Helge Deller wrote:
> Patch is attached below.
> I added a memory-barrier to set_pte() too.
I think the huge page support needs looking at.

-- 
John David Anglin  dave.anglin@bell.net


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-02-11 23:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <c696f95d-a5ba-1f73-fbe9-a5d3f25e79c0@bell.net>
     [not found] ` <2786b971-254a-ae07-ea24-38e57bd29892@bell.net>
     [not found]   ` <d4751664-6627-920e-9c44-7f9b7e287431@bell.net>
     [not found]     ` <6fb36e0e-62f5-68c7-92ec-c6dd16841813@bell.net>
     [not found]       ` <44ee7e09-90e7-0766-f0e4-bde2d3cdc2ec@bell.net>
     [not found]         ` <5100eb80-975f-d77d-846a-5aabc25d0f95@bell.net>
     [not found]           ` <e023991b-ba2e-f6da-94fb-0988ad70e717@bell.net>
     [not found]             ` <9b9c6446-365f-9ca6-b89f-c330fca11952@bell.net>
     [not found]               ` <94210da5-5642-82ef-85ae-688e1c07473d@gmx.de>
     [not found]                 ` <4f76001d-f050-286f-4b6f-790554583eea@bell.net>
2021-01-27 21:18                   ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Helge Deller
2021-01-28  8:36                     ` Rolf Eike Beer
2021-01-28 15:24                       ` [PATCH] parisc: Optimize per-pagetable spinlocks (v12) Helge Deller
2021-02-10 14:52                     ` [PATCH] parisc: Optimize per-pagetable spinlocks (v11) Guenter Roeck
2021-02-10 17:23                       ` Helge Deller
2021-02-10 18:57                         ` John David Anglin
2021-02-11  1:20                           ` Guenter Roeck
2021-02-11 14:38                             ` John David Anglin
2021-02-11 21:51                               ` Guenter Roeck
2021-02-11 22:16                                 ` John David Anglin
2021-02-11 23:12                                   ` Guenter Roeck
2021-02-11 23:14                                   ` Helge Deller
2021-02-11 23:22                                     ` Helge Deller
2021-02-11 23:23                                     ` John David Anglin
2021-02-11 23:34                                     ` John David Anglin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.