From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-15.3 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,NICE_REPLY_A,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED, USER_AGENT_SANE_1 autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4E634C11F66 for ; Wed, 14 Jul 2021 15:38:21 +0000 (UTC) Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17]) by mail.kernel.org (Postfix) with ESMTP id DF46F613DA for ; Wed, 14 Jul 2021 15:38:20 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org DF46F613DA Authentication-Results: mail.kernel.org; dmarc=fail (p=none dis=none) header.from=arm.com Authentication-Results: mail.kernel.org; spf=pass smtp.mailfrom=owner-linux-mm@kvack.org Received: by kanga.kvack.org (Postfix) id C58356B0083; Wed, 14 Jul 2021 11:38:20 -0400 (EDT) Received: by kanga.kvack.org (Postfix, from userid 40) id C091C6B0085; Wed, 14 Jul 2021 11:38:20 -0400 (EDT) X-Delivered-To: int-list-linux-mm@kvack.org Received: by kanga.kvack.org (Postfix, from userid 63042) id AA96A6B0088; Wed, 14 Jul 2021 11:38:20 -0400 (EDT) X-Delivered-To: linux-mm@kvack.org Received: from forelay.hostedemail.com (smtprelay0025.hostedemail.com [216.40.44.25]) by kanga.kvack.org (Postfix) with ESMTP id 8070D6B0083 for ; Wed, 14 Jul 2021 11:38:20 -0400 (EDT) Received: from smtpin19.hostedemail.com (10.5.19.251.rfc1918.com [10.5.19.251]) by forelay02.hostedemail.com (Postfix) with ESMTP id 6928D31EB4 for ; Wed, 14 Jul 2021 15:38:19 +0000 (UTC) X-FDA: 78361599918.19.E5BFEEC Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by imf18.hostedemail.com (Postfix) with ESMTP id C986C400208E for ; Wed, 14 Jul 2021 15:38:18 +0000 (UTC) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id D11C631B; Wed, 14 Jul 2021 08:38:17 -0700 (PDT) Received: from [192.168.1.179] (unknown [172.31.20.19]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 49CB13F7D8; Wed, 14 Jul 2021 08:38:16 -0700 (PDT) Subject: Re: [RFC 06/10] arm64/mm: Add FEAT_LPA2 specific encoding To: Anshuman Khandual , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: akpm@linux-foundation.org, suzuki.poulose@arm.com, mark.rutland@arm.com, will@kernel.org, catalin.marinas@arm.com, maz@kernel.org, james.morse@arm.com References: <1626229291-6569-1-git-send-email-anshuman.khandual@arm.com> <1626229291-6569-7-git-send-email-anshuman.khandual@arm.com> From: Steven Price Message-ID: <9f0d9925-3694-3fae-0d09-00adbecd1878@arm.com> Date: Wed, 14 Jul 2021 16:38:15 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.11.0 MIME-Version: 1.0 In-Reply-To: <1626229291-6569-7-git-send-email-anshuman.khandual@arm.com> Content-Type: text/plain; charset=utf-8 Content-Language: en-GB Content-Transfer-Encoding: 7bit X-Rspamd-Server: rspam06 X-Rspamd-Queue-Id: C986C400208E X-Stat-Signature: 863z8fj11ay3hfw57pip9mcjs7cf5fw1 Authentication-Results: imf18.hostedemail.com; dkim=none; spf=pass (imf18.hostedemail.com: domain of steven.price@arm.com designates 217.140.110.172 as permitted sender) smtp.mailfrom=steven.price@arm.com; dmarc=pass (policy=none) header.from=arm.com X-HE-Tag: 1626277098-168407 X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4 Sender: owner-linux-mm@kvack.org Precedence: bulk X-Loop: owner-majordomo@kvack.org List-ID: On 14/07/2021 03:21, Anshuman Khandual wrote: > FEAT_LPA2 requires different PTE representation formats for both 4K and 16K > page size config. This adds FEAT_LPA2 specific new PTE encodings as per ARM > ARM (0487G.A) which updates [pte|phys]_to_[phys|pte](). The updated helpers > would be used when FEAT_LPA2 gets enabled via CONFIG_ARM64_PA_BITS_52 on 4K > and 16K page size. Although TTBR encoding and phys_to_ttbr() helper remains > the same as FEAT_LPA for FEAT_LPA2 as well. It updates 'phys_to_pte' helper > to accept a temporary variable and changes impacted call sites. > > Signed-off-by: Anshuman Khandual > --- > arch/arm64/include/asm/assembler.h | 23 +++++++++++++++++++---- > arch/arm64/include/asm/pgtable-hwdef.h | 4 ++++ > arch/arm64/include/asm/pgtable.h | 4 ++++ > arch/arm64/kernel/head.S | 25 +++++++++++++------------ > 4 files changed, 40 insertions(+), 16 deletions(-) > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > index fedc202..0492543 100644 > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -606,7 +606,7 @@ alternative_endif > #endif > .endm > > - .macro phys_to_pte, pte, phys > + .macro phys_to_pte, pte, phys, tmp > #ifdef CONFIG_ARM64_PA_BITS_52_LPA > /* > * We assume \phys is 64K aligned and this is guaranteed by only > @@ -614,6 +614,17 @@ alternative_endif > */ > orr \pte, \phys, \phys, lsr #36 > and \pte, \pte, #PTE_ADDR_MASK > +#elif defined(CONFIG_ARM64_PA_BITS_52_LPA2) > + orr \pte, \phys, \phys, lsr #42 > + > + /* > + * The 'tmp' is being used here to just prepare > + * and hold PTE_ADDR_MASK which cannot be passed > + * to the subsequent 'and' instruction. > + */ > + mov \tmp, #PTE_ADDR_LOW > + orr \tmp, \tmp, #PTE_ADDR_HIGH > + and \pte, \pte, \tmp Rather than adding an extra temporary register (and the fallout of various other macros needing an extra register), this can be done with two AND instructions: /* PTE_ADDR_MASK cannot be encoded as an immediate, so * mask off all but two bits, followed by masking the * extra two bits */ and \pte, \pte, #PTE_ADDR_MASK | (3 << 10) and \pte, \pte, #~(3 << 10) Steve > #else /* !CONFIG_ARM64_PA_BITS_52_LPA */ > mov \pte, \phys > #endif /* CONFIG_ARM64_PA_BITS_52_LPA */ > @@ -621,9 +632,13 @@ alternative_endif > > .macro pte_to_phys, phys, pte > #ifdef CONFIG_ARM64_PA_BITS_52_LPA > - ubfiz \phys, \pte, #(48 - 16 - 12), #16 > - bfxil \phys, \pte, #16, #32 > - lsl \phys, \phys, #16 > + ubfiz \phys, \pte, #(48 - PAGE_SHIFT - 12), #16 > + bfxil \phys, \pte, #PAGE_SHIFT, #(48 - PAGE_SHIFT) > + lsl \phys, \phys, #PAGE_SHIFT > +#elif defined(CONFIG_ARM64_PA_BITS_52_LPA2) > + ubfiz \phys, \pte, #(52 - PAGE_SHIFT - 10), #10 > + bfxil \phys, \pte, #PAGE_SHIFT, #(50 - PAGE_SHIFT) > + lsl \phys, \phys, #PAGE_SHIFT > #else /* !CONFIG_ARM64_PA_BITS_52_LPA */ > and \phys, \pte, #PTE_ADDR_MASK > #endif /* CONFIG_ARM64_PA_BITS_52_LPA */ > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index f375bcf..c815a85 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -159,6 +159,10 @@ > #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) > #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) > #define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) > +#elif defined(CONFIG_ARM64_PA_BITS_52_LPA2) > +#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) > +#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8) > +#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) > #else /* !CONFIG_ARM64_PA_BITS_52_LPA */ > #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) > #define PTE_ADDR_MASK PTE_ADDR_LOW > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 3c57fb2..5e7e402 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -70,6 +70,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; > #define __pte_to_phys(pte) \ > ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) > #define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) > +#elif defined(CONFIG_ARM64_PA_BITS_52_LPA2) > +#define __pte_to_phys(pte) \ > + ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 42)) > +#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 42)) & PTE_ADDR_MASK) > #else /* !CONFIG_ARM64_PA_BITS_52_LPA */ > #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) > #define __phys_to_pte_val(phys) (phys) > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index c5c994a..6444147 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -134,9 +134,9 @@ SYM_CODE_END(preserve_boot_args) > * Corrupts: ptrs, tmp1, tmp2 > * Returns: tbl -> next level table page address > */ > - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 > + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2, tmp3 > add \tmp1, \tbl, #PAGE_SIZE > - phys_to_pte \tmp2, \tmp1 > + phys_to_pte \tmp2, \tmp1, \tmp3 > orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type > lsr \tmp1, \virt, #\shift > sub \ptrs, \ptrs, #1 > @@ -161,8 +161,8 @@ SYM_CODE_END(preserve_boot_args) > * Corrupts: index, tmp1 > * Returns: rtbl > */ > - .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 > -.Lpe\@: phys_to_pte \tmp1, \rtbl > + .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1, tmp2 > +.Lpe\@: phys_to_pte \tmp1, \rtbl, \tmp2 > orr \tmp1, \tmp1, \flags // tmp1 = table entry > str \tmp1, [\tbl, \index, lsl #3] > add \rtbl, \rtbl, \inc // rtbl = pa next level > @@ -224,31 +224,32 @@ SYM_CODE_END(preserve_boot_args) > * Preserves: vstart, vend, flags > * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv > */ > - .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv > + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, \ > + tmp, tmp1, count, sv > add \rtbl, \tbl, #PAGE_SIZE > mov \sv, \rtbl > mov \count, #0 > compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count > - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp, \tmp1 > mov \tbl, \sv > mov \sv, \rtbl > > #if SWAPPER_PGTABLE_LEVELS > 3 > compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count > - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp, \tmp1 > mov \tbl, \sv > mov \sv, \rtbl > #endif > > #if SWAPPER_PGTABLE_LEVELS > 2 > compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count > - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp, \tmp1 > mov \tbl, \sv > #endif > > compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count > bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 > - populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp > + populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp, \tmp1 > .endm > > /* > @@ -343,7 +344,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) > #endif > > mov x4, EXTRA_PTRS > - create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 > + create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6, x20 > #else > /* > * If VA_BITS == 48, we don't have to configure an additional > @@ -356,7 +357,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) > ldr_l x4, idmap_ptrs_per_pgd > adr_l x6, __idmap_text_end // __pa(__idmap_text_end) > > - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 > + map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x20, x13, x14 > > /* > * Map the kernel image (starting with PHYS_OFFSET). > @@ -370,7 +371,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) > sub x6, x6, x3 // _end - _text > add x6, x6, x5 // runtime __va(_end) > > - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 > + map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x20, x13, x14 > > /* > * Since the page tables have been populated with non-cacheable >