From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp08.in.ibm.com (e28smtp08.in.ibm.com [122.248.162.8]) (using TLSv1 with cipher CAMELLIA256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 3CC181A1AB2 for ; Tue, 1 Dec 2015 14:37:20 +1100 (AEDT) Received: from localhost by e28smtp08.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 1 Dec 2015 09:07:18 +0530 Received: from d28relay03.in.ibm.com (d28relay03.in.ibm.com [9.184.220.60]) by d28dlp01.in.ibm.com (Postfix) with ESMTP id 36720E0065 for ; Tue, 1 Dec 2015 09:07:56 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay03.in.ibm.com (8.14.9/8.14.9/NCO v10.0) with ESMTP id tB13bFf26619468 for ; Tue, 1 Dec 2015 09:07:15 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id tB13bE4X022249 for ; Tue, 1 Dec 2015 09:07:14 +0530 From: "Aneesh Kumar K.V" To: benh@kernel.crashing.org, paulus@samba.org, mpe@ellerman.id.au, Scott Wood , Denis Kirjanov Cc: linuxppc-dev@lists.ozlabs.org, "Aneesh Kumar K.V" Subject: [PATCH V6 23/35] powerpc/mm: Convert __hash_page_64K to C Date: Tue, 1 Dec 2015 09:06:48 +0530 Message-Id: <1448941020-15168-24-git-send-email-aneesh.kumar@linux.vnet.ibm.com> In-Reply-To: <1448941020-15168-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com> References: <1448941020-15168-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com> List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Convert from asm to C Acked-by: Scott Wood Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 3 +- arch/powerpc/mm/hash64_64k.c | 130 ++++++++++++ arch/powerpc/mm/hash_low_64.S | 290 +------------------------- arch/powerpc/mm/hash_utils_64.c | 19 +- 4 files changed, 134 insertions(+), 308 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 9fca7fae434b..46b5d0ab11de 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -40,7 +40,8 @@ #define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO) +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ + _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) /* Shift to put page number into pte. * diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 8f7328075f04..fc0898eb309d 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -229,3 +229,133 @@ repeat: *ptep = __pte(new_pte & ~_PAGE_BUSY); return 0; } + +int __hash_page_64K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned long flags, int ssize) +{ + + unsigned long hpte_group; + unsigned long rflags, pa; + unsigned long old_pte, new_pte; + unsigned long vpn, hash, slot; + unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift; + + /* + * atomically mark the linux large page PTE busy and dirty + */ + do { + pte_t pte = READ_ONCE(*ptep); + + old_pte = pte_val(pte); + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* + * Check if PTE has the cache-inhibit bit set + * If so, bail out and refault as a 4k page + */ + if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && + unlikely(old_pte & _PAGE_NO_CACHE)) + return 0; + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access. Since this is 4K insert of 64K page size + * also add _PAGE_COMBO + */ + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; + } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); + /* + * PP bits. _PAGE_USER is already PP bit 0x2, so we only + * need to add in 0x1 if it's a read-only user page + */ + rflags = new_pte & _PAGE_USER; + if ((new_pte & _PAGE_USER) && !((new_pte & _PAGE_RW) && + (new_pte & _PAGE_DIRTY))) + rflags |= 0x1; + /* + * _PAGE_EXEC -> HW_NO_EXEC since it's inverted + */ + rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + /* + * Always add C and Memory coherence bit + */ + rflags |= HPTE_R_C | HPTE_R_M; + /* + * Add in WIMG bits + */ + rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); + + if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); + + vpn = hpt_vpn(ea, vsid, ssize); + if (unlikely(old_pte & _PAGE_HASHPTE)) { + /* + * There MIGHT be an HPTE for this pte + */ + hash = hpt_hash(vpn, shift, ssize); + if (old_pte & _PAGE_F_SECOND) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + + if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, flags) == -1) + old_pte &= ~_PAGE_HPTEFLAGS; + } + + if (likely(!(old_pte & _PAGE_HASHPTE))) { + + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; + hash = hpt_hash(vpn, shift, ssize); + +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, ssize); + /* + * Primary is full, try the secondary + */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_64K, MMU_PAGE_64K, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + ppc_md.hpte_remove(hpte_group); + /* + * FIXME!! Should be try the group from which we removed ? + */ + goto repeat; + } + } + /* + * Hypervisor failure. Restore old pmd and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + MMU_PAGE_64K, MMU_PAGE_64K, old_pte); + return -1; + } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX); + } + *ptep = __pte(new_pte & ~_PAGE_BUSY); + return 0; +} diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 359839a57f26..f7d49cf0ccb7 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -328,292 +328,4 @@ htab_pte_insert_failure: li r3,-1 b htab_bail -#else /* CONFIG_PPC_64K_PAGES */ - -/***************************************************************************** - * * - * 64K SW & 64K HW in a 64K segment pages implementation * - * * - *****************************************************************************/ - -_GLOBAL(__hash_page_64K) - mflr r0 - std r0,16(r1) - stdu r1,-STACKFRAMESIZE(r1) - /* Save all params that we need after a function call */ - std r6,STK_PARAM(R6)(r1) - std r8,STK_PARAM(R8)(r1) - std r9,STK_PARAM(R9)(r1) - - /* Save non-volatile registers. - * r31 will hold "old PTE" - * r30 is "new PTE" - * r29 is vpn - * r28 is a hash value - * r27 is hashtab mask (maybe dynamic patched instead ?) - */ - std r27,STK_REG(R27)(r1) - std r28,STK_REG(R28)(r1) - std r29,STK_REG(R29)(r1) - std r30,STK_REG(R30)(r1) - std r31,STK_REG(R31)(r1) - - /* Step 1: - * - * Check permissions, atomically mark the linux PTE busy - * and hashed. - */ -1: - ldarx r31,0,r6 - /* Check access rights (access & ~(pte_val(*ptep))) */ - andc. r0,r4,r31 - bne- ht64_wrong_access - /* Check if PTE is busy */ - andi. r0,r31,_PAGE_BUSY - /* If so, just bail out and refault if needed. Someone else - * is changing this PTE anyway and might hash it. - */ - bne- ht64_bail_ok -BEGIN_FTR_SECTION - /* Check if PTE has the cache-inhibit bit set */ - andi. r0,r31,_PAGE_NO_CACHE - /* If so, bail out and refault as a 4k page */ - bne- ht64_bail_ok -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE) - /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY and ACCESSED) - */ - rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ - or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED - /* Write the linux PTE atomically (setting busy) */ - stdcx. r30,0,r6 - bne- 1b - isync - - /* Step 2: - * - * Insert/Update the HPTE in the hash table. At this point, - * r4 (access) is re-useable, we use it for the new HPTE flags - */ - -BEGIN_FTR_SECTION - cmpdi r9,0 /* check segment size */ - bne 3f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) - or r29,r28,r29 - - /* Calculate hash value for primary slot and store it in r28 - * r3 = va, r5 = vsid - * r0 = (va >> 16) & ((1ul << (28 - 16)) -1) - */ - rldicl r0,r3,64-16,52 - xor r28,r5,r0 /* hash */ - b 4f - -3: /* Calc vpn and put it in r29 */ - sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT - rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) - or r29,r28,r29 - /* - * calculate hash value for primary slot and - * store it in r28 for 1T segment - * r3 = va, r5 = vsid - */ - sldi r28,r5,25 /* vsid << 25 */ - /* r0 = (va >> 16) & ((1ul << (40 - 16)) -1) */ - rldicl r0,r3,64-16,40 - xor r28,r28,r5 /* vsid ^ ( vsid << 25) */ - xor r28,r28,r0 /* hash */ - - /* Convert linux PTE bits into HW equivalents */ -4: andi. r3,r30,0x1fe /* Get basic set of flags */ - xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ - rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ - rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ - and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ - andc r0,r30,r0 /* r0 = pte & ~r0 */ - rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - /* - * Always add "C" bit for perf. Memory coherence is always enabled - */ - ori r3,r3,HPTE_R_C | HPTE_R_M - - /* We eventually do the icache sync here (maybe inline that - * code rather than call a C function...) - */ -BEGIN_FTR_SECTION - mr r4,r30 - mr r5,r7 - bl hash_page_do_lazy_icache -END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) - - /* At this point, r3 contains new PP bits, save them in - * place of "access" in the param area (sic) - */ - std r3,STK_PARAM(R4)(r1) - - /* Get htab_hash_mask */ - ld r4,htab_hash_mask@got(2) - ld r27,0(r4) /* htab_hash_mask -> r27 */ - - /* Check if we may already be in the hashtable, in this case, we - * go to out-of-line code to try to modify the HPTE - */ - rldicl. r0,r31,64-12,48 - bne ht64_modify_pte - -ht64_insert_pte: - /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HPTE_SUB0 - */ - lis r0,_PAGE_HPTEFLAGS@h - ori r0,r0,_PAGE_HPTEFLAGS@l - andc r30,r30,r0 - ori r30,r30,_PAGE_HASHPTE - /* Phyical address in r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate primary group hash */ - and r0,r28,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,0 /* !bolted, !secondary */ - li r8,MMU_PAGE_64K - li r9,MMU_PAGE_64K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl ht64_call_hpte_insert1 -ht64_call_hpte_insert1: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge ht64_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- ht64_pte_insert_failure - - /* Now try secondary slot */ - - /* Phyical address in r5 */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT - sldi r5,r5,PAGE_SHIFT - - /* Calculate secondary group hash */ - andc r0,r27,r28 - rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ - - /* Call ppc_md.hpte_insert */ - ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve vpn */ - li r7,HPTE_V_SECONDARY /* !bolted, secondary */ - li r8,MMU_PAGE_64K - li r9,MMU_PAGE_64K /* actual page size */ - ld r10,STK_PARAM(R9)(r1) /* segment size */ -.globl ht64_call_hpte_insert2 -ht64_call_hpte_insert2: - bl . /* patched by htab_finish_init() */ - cmpdi 0,r3,0 - bge+ ht64_pte_insert_ok /* Insertion successful */ - cmpdi 0,r3,-2 /* Critical failure */ - beq- ht64_pte_insert_failure - - /* Both are full, we need to evict something */ - mftb r0 - /* Pick a random group based on TB */ - andi. r0,r0,1 - mr r5,r28 - bne 2f - not r5,r5 -2: and r0,r5,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - /* Call ppc_md.hpte_remove */ -.globl ht64_call_hpte_remove -ht64_call_hpte_remove: - bl . /* patched by htab_finish_init() */ - - /* Try all again */ - b ht64_insert_pte - -ht64_bail_ok: - li r3,0 - b ht64_bail - -ht64_pte_insert_ok: - /* Insert slot number & secondary bit in PTE */ - rldimi r30,r3,12,63-15 - - /* Write out the PTE with a normal write - * (maybe add eieio may be good still ?) - */ -ht64_write_out_pte: - ld r6,STK_PARAM(R6)(r1) - std r30,0(r6) - li r3, 0 -ht64_bail: - ld r27,STK_REG(R27)(r1) - ld r28,STK_REG(R28)(r1) - ld r29,STK_REG(R29)(r1) - ld r30,STK_REG(R30)(r1) - ld r31,STK_REG(R31)(r1) - addi r1,r1,STACKFRAMESIZE - ld r0,16(r1) - mtlr r0 - blr - -ht64_modify_pte: - /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ - mr r4,r3 - rlwinm r3,r31,32-12,29,31 - - /* Secondary group ? if yes, get a inverted hash value */ - mr r5,r28 - andi. r0,r31,_PAGE_F_SECOND - beq 1f - not r5,r5 -1: - /* Calculate proper slot value for ppc_md.hpte_updatepp */ - and r0,r5,r27 - rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ - add r3,r0,r3 /* add slot idx */ - - /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* vpn */ - li r6,MMU_PAGE_64K /* base page size */ - li r7,MMU_PAGE_64K /* actual page size */ - ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ -.globl ht64_call_hpte_updatepp -ht64_call_hpte_updatepp: - bl . /* patched by htab_finish_init() */ - - /* if we failed because typically the HPTE wasn't really here - * we try an insertion. - */ - cmpdi 0,r3,-1 - beq- ht64_insert_pte - - /* Clear the BUSY bit and Write out the PTE */ - li r0,_PAGE_BUSY - andc r30,r30,r0 - b ht64_write_out_pte - -ht64_wrong_access: - /* Bail out clearing reservation */ - stdcx. r31,0,r6 - li r3,1 - b ht64_bail - -ht64_pte_insert_failure: - /* Bail out restoring old PTE */ - ld r6,STK_PARAM(R6)(r1) - std r31,0(r6) - li r3,-1 - b ht64_bail - - -#endif /* CONFIG_PPC_64K_PAGES */ +#endif diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 12eff232840b..60ad15d76052 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -633,28 +633,11 @@ extern u32 htab_call_hpte_insert1[]; extern u32 htab_call_hpte_insert2[]; extern u32 htab_call_hpte_remove[]; extern u32 htab_call_hpte_updatepp[]; -extern u32 ht64_call_hpte_insert1[]; -extern u32 ht64_call_hpte_insert2[]; -extern u32 ht64_call_hpte_remove[]; -extern u32 ht64_call_hpte_updatepp[]; static void __init htab_finish_init(void) { -#ifdef CONFIG_PPC_64K_PAGES - patch_branch(ht64_call_hpte_insert1, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_insert2, - ppc_function_entry(ppc_md.hpte_insert), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_remove, - ppc_function_entry(ppc_md.hpte_remove), - BRANCH_SET_LINK); - patch_branch(ht64_call_hpte_updatepp, - ppc_function_entry(ppc_md.hpte_updatepp), - BRANCH_SET_LINK); -#else /* !CONFIG_PPC_64K_PAGES */ +#ifdef CONFIG_PPC_4K_PAGES patch_branch(htab_call_hpte_insert1, ppc_function_entry(ppc_md.hpte_insert), BRANCH_SET_LINK); -- 2.5.0