From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id D74CC883B; Fri, 10 Mar 2023 15:47:21 +0000 (UTC) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 8CCBFAD7; Fri, 10 Mar 2023 07:48:04 -0800 (PST) Received: from [10.1.25.11] (e122027.cambridge.arm.com [10.1.25.11]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 700F03FA32; Fri, 10 Mar 2023 07:47:18 -0800 (PST) Message-ID: <6c6ff608-2314-a49a-84ee-fac883c4e227@arm.com> Date: Fri, 10 Mar 2023 15:47:16 +0000 Precedence: bulk X-Mailing-List: linux-coco@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.7.1 Subject: Re: [RFC PATCH 16/28] arm64: RME: Allow populating initial contents Content-Language: en-GB To: Zhi Wang Cc: kvm@vger.kernel.org, kvmarm@lists.linux.dev, Catalin Marinas , Marc Zyngier , Will Deacon , James Morse , Oliver Upton , Suzuki K Poulose , Zenghui Yu , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Joey Gouly , Alexandru Elisei , Christoffer Dall , Fuad Tabba , linux-coco@lists.linux.dev References: <20230127112248.136810-1-suzuki.poulose@arm.com> <20230127112932.38045-1-steven.price@arm.com> <20230127112932.38045-17-steven.price@arm.com> <20230306193439.000048f2@gmail.com> From: Steven Price In-Reply-To: <20230306193439.000048f2@gmail.com> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit On 06/03/2023 17:34, Zhi Wang wrote: > On Fri, 27 Jan 2023 11:29:20 +0000 > Steven Price wrote: > >> The VMM needs to populate the realm with some data before starting (e.g. >> a kernel and initrd). This is measured by the RMM and used as part of >> the attestation later on. >> >> Signed-off-by: Steven Price >> --- >> arch/arm64/kvm/rme.c | 366 +++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 366 insertions(+) >> >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c >> index 16e0bfea98b1..3405b43e1421 100644 >> --- a/arch/arm64/kvm/rme.c >> +++ b/arch/arm64/kvm/rme.c >> @@ -4,6 +4,7 @@ >> */ >> >> #include >> +#include >> >> #include >> #include >> @@ -426,6 +427,359 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size) >> } >> } >> >> +static int realm_create_protected_data_page(struct realm *realm, >> + unsigned long ipa, >> + struct page *dst_page, >> + struct page *tmp_page) >> +{ >> + phys_addr_t dst_phys, tmp_phys; >> + int ret; >> + >> + copy_page(page_address(tmp_page), page_address(dst_page)); >> + >> + dst_phys = page_to_phys(dst_page); >> + tmp_phys = page_to_phys(tmp_page); >> + >> + if (rmi_granule_delegate(dst_phys)) >> + return -ENXIO; >> + >> + ret = rmi_data_create(dst_phys, virt_to_phys(realm->rd), ipa, tmp_phys, >> + RMI_MEASURE_CONTENT); >> + >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + /* Create missing RTTs and retry */ >> + int level = RMI_RETURN_INDEX(ret); >> + >> + ret = realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, NULL); >> + if (ret) >> + goto err; >> + >> + ret = rmi_data_create(dst_phys, virt_to_phys(realm->rd), ipa, >> + tmp_phys, RMI_MEASURE_CONTENT); >> + } >> + >> + if (ret) >> + goto err; >> + >> + return 0; >> + >> +err: >> + if (WARN_ON(rmi_granule_undelegate(dst_phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(dst_page); >> + } >> + return -ENXIO; >> +} >> + >> +static int fold_rtt(phys_addr_t rd, unsigned long addr, int level, >> + struct realm *realm) >> +{ >> + struct rtt_entry rtt; >> + phys_addr_t rtt_addr; >> + >> + if (rmi_rtt_read_entry(rd, addr, level, &rtt)) >> + return -ENXIO; >> + >> + if (rtt.state != RMI_TABLE) >> + return -EINVAL; >> + >> + rtt_addr = rmi_rtt_get_phys(&rtt); >> + if (rmi_rtt_fold(rtt_addr, rd, addr, level + 1)) >> + return -ENXIO; >> + >> + free_delegated_page(realm, rtt_addr); >> + >> + return 0; >> +} >> + >> +int realm_map_protected(struct realm *realm, >> + unsigned long hva, >> + unsigned long base_ipa, >> + struct page *dst_page, >> + unsigned long map_size, >> + struct kvm_mmu_memory_cache *memcache) >> +{ >> + phys_addr_t dst_phys = page_to_phys(dst_page); >> + phys_addr_t rd = virt_to_phys(realm->rd); >> + unsigned long phys = dst_phys; >> + unsigned long ipa = base_ipa; >> + unsigned long size; >> + int map_level; >> + int ret = 0; >> + >> + if (WARN_ON(!IS_ALIGNED(ipa, map_size))) >> + return -EINVAL; >> + >> + switch (map_size) { >> + case PAGE_SIZE: >> + map_level = 3; >> + break; >> + case RME_L2_BLOCK_SIZE: >> + map_level = 2; >> + break; >> + default: >> + return -EINVAL; >> + } >> + >> + if (map_level < RME_RTT_MAX_LEVEL) { >> + /* >> + * A temporary RTT is needed during the map, precreate it, >> + * however if there is an error (e.g. missing parent tables) >> + * this will be handled below. >> + */ >> + realm_create_rtt_levels(realm, ipa, map_level, >> + RME_RTT_MAX_LEVEL, memcache); >> + } >> + >> + for (size = 0; size < map_size; size += PAGE_SIZE) { >> + if (rmi_granule_delegate(phys)) { >> + struct rtt_entry rtt; >> + >> + /* >> + * It's possible we raced with another VCPU on the same >> + * fault. If the entry exists and matches then exit >> + * early and assume the other VCPU will handle the >> + * mapping. >> + */ >> + if (rmi_rtt_read_entry(rd, ipa, RME_RTT_MAX_LEVEL, &rtt)) >> + goto err; >> + >> + // FIXME: For a block mapping this could race at level >> + // 2 or 3... >> + if (WARN_ON((rtt.walk_level != RME_RTT_MAX_LEVEL || >> + rtt.state != RMI_ASSIGNED || >> + rtt.desc != phys))) { >> + goto err; >> + } >> + >> + return 0; >> + } >> + >> + ret = rmi_data_create_unknown(phys, rd, ipa); >> + >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + /* Create missing RTTs and retry */ >> + int level = RMI_RETURN_INDEX(ret); >> + >> + ret = realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, >> + memcache); >> + WARN_ON(ret); >> + if (ret) >> + goto err_undelegate; >> + >> + ret = rmi_data_create_unknown(phys, rd, ipa); >> + } >> + WARN_ON(ret); >> + >> + if (ret) >> + goto err_undelegate; >> + >> + phys += PAGE_SIZE; >> + ipa += PAGE_SIZE; >> + } >> + >> + if (map_size == RME_L2_BLOCK_SIZE) >> + ret = fold_rtt(rd, base_ipa, map_level, realm); >> + if (WARN_ON(ret)) >> + goto err; >> + >> + return 0; >> + >> +err_undelegate: >> + if (WARN_ON(rmi_granule_undelegate(phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(phys_to_page(phys)); >> + } >> +err: >> + while (size > 0) { >> + phys -= PAGE_SIZE; >> + size -= PAGE_SIZE; >> + ipa -= PAGE_SIZE; >> + >> + rmi_data_destroy(rd, ipa); >> + >> + if (WARN_ON(rmi_granule_undelegate(phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(phys_to_page(phys)); >> + } >> + } >> + return -ENXIO; >> +} >> + > > There seems no caller to the function above. Better move it to the related > patch. Indeed this should really be in the next patch - will move as it's very confusing having it in this patch (sorry about that). >> +static int populate_par_region(struct kvm *kvm, >> + phys_addr_t ipa_base, >> + phys_addr_t ipa_end) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + struct kvm_memory_slot *memslot; >> + gfn_t base_gfn, end_gfn; >> + int idx; >> + phys_addr_t ipa; >> + int ret = 0; >> + struct page *tmp_page; >> + phys_addr_t rd = virt_to_phys(realm->rd); >> + >> + base_gfn = gpa_to_gfn(ipa_base); >> + end_gfn = gpa_to_gfn(ipa_end); >> + >> + idx = srcu_read_lock(&kvm->srcu); >> + memslot = gfn_to_memslot(kvm, base_gfn); >> + if (!memslot) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + /* We require the region to be contained within a single memslot */ >> + if (memslot->base_gfn + memslot->npages < end_gfn) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + tmp_page = alloc_page(GFP_KERNEL); >> + if (!tmp_page) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + mmap_read_lock(current->mm); >> + >> + ipa = ipa_base; >> + >> + while (ipa < ipa_end) { >> + struct vm_area_struct *vma; >> + unsigned long map_size; >> + unsigned int vma_shift; >> + unsigned long offset; >> + unsigned long hva; >> + struct page *page; >> + kvm_pfn_t pfn; >> + int level; >> + >> + hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa)); >> + vma = vma_lookup(current->mm, hva); >> + if (!vma) { >> + ret = -EFAULT; >> + break; >> + } >> + >> + if (is_vm_hugetlb_page(vma)) >> + vma_shift = huge_page_shift(hstate_vma(vma)); >> + else >> + vma_shift = PAGE_SHIFT; >> + >> + map_size = 1 << vma_shift; >> + >> + /* >> + * FIXME: This causes over mapping, but there's no good >> + * solution here with the ABI as it stands >> + */ >> + ipa = ALIGN_DOWN(ipa, map_size); >> + >> + switch (map_size) { >> + case RME_L2_BLOCK_SIZE: >> + level = 2; >> + break; >> + case PAGE_SIZE: >> + level = 3; >> + break; >> + default: >> + WARN_ONCE(1, "Unsupport vma_shift %d", vma_shift); >> + ret = -EFAULT; >> + break; >> + } >> + >> + pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); >> + >> + if (is_error_pfn(pfn)) { >> + ret = -EFAULT; >> + break; >> + } >> + >> + ret = rmi_rtt_init_ripas(rd, ipa, level); >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + ret = realm_create_rtt_levels(realm, ipa, >> + RMI_RETURN_INDEX(ret), >> + level, NULL); >> + if (ret) >> + break; >> + ret = rmi_rtt_init_ripas(rd, ipa, level); >> + if (ret) { >> + ret = -ENXIO; >> + break; >> + } >> + } >> + >> + if (level < RME_RTT_MAX_LEVEL) { >> + /* >> + * A temporary RTT is needed during the map, precreate >> + * it, however if there is an error (e.g. missing >> + * parent tables) this will be handled in the >> + * realm_create_protected_data_page() call. >> + */ >> + realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, NULL); >> + } >> + >> + page = pfn_to_page(pfn); >> + >> + for (offset = 0; offset < map_size && !ret; >> + offset += PAGE_SIZE, page++) { >> + phys_addr_t page_ipa = ipa + offset; >> + >> + ret = realm_create_protected_data_page(realm, page_ipa, >> + page, tmp_page); >> + } >> + if (ret) >> + goto err_release_pfn; >> + >> + if (level == 2) { >> + ret = fold_rtt(rd, ipa, level, realm); >> + if (ret) >> + goto err_release_pfn; >> + } >> + >> + ipa += map_size; > >> + kvm_set_pfn_accessed(pfn); >> + kvm_set_pfn_dirty(pfn); > > kvm_release_pfn_dirty() has already called kvm_set_pfn_{accessed, dirty}(). Will remove those calls. >> + kvm_release_pfn_dirty(pfn); >> +err_release_pfn: >> + if (ret) { >> + kvm_release_pfn_clean(pfn); >> + break; >> + } >> + } >> + >> + mmap_read_unlock(current->mm); >> + __free_page(tmp_page); >> + >> +out: >> + srcu_read_unlock(&kvm->srcu, idx); >> + return ret; >> +} >> + >> +static int kvm_populate_realm(struct kvm *kvm, >> + struct kvm_cap_arm_rme_populate_realm_args *args) >> +{ >> + phys_addr_t ipa_base, ipa_end; >> + > > Check kvm_is_realm(kvm) here or in the kvm_realm_enable_cap(). I'm going to update kvm_vm_ioctl_enable_cap() to check kvm_is_realm() so we won't get here. >> + if (kvm_realm_state(kvm) != REALM_STATE_NEW) >> + return -EBUSY; > > Maybe -EINVAL? The realm hasn't been created (RMI_REALM_CREATE is not called > yet). The userspace shouldn't reach this path. Well user space can attempt to populate in the ACTIVE state - which is where the idea of 'busy' comes from. Admittedly it's a little confusing when RMI_REALM_CREATE hasn't been called. I'm not particularly bothered about the return code, but it's useful to have a different code to -EINVAL as it's not an invalid argument, but calling at the wrong time. I can't immediately see a better error code though. Steve >> + >> + if (!IS_ALIGNED(args->populate_ipa_base, PAGE_SIZE) || >> + !IS_ALIGNED(args->populate_ipa_size, PAGE_SIZE)) >> + return -EINVAL; >> + >> + ipa_base = args->populate_ipa_base; >> + ipa_end = ipa_base + args->populate_ipa_size; >> + >> + if (ipa_end < ipa_base) >> + return -EINVAL; >> + >> + return populate_par_region(kvm, ipa_base, ipa_end); >> +} >> + >> static int set_ipa_state(struct kvm_vcpu *vcpu, >> unsigned long ipa, >> unsigned long end, >> @@ -748,6 +1102,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) >> r = kvm_init_ipa_range_realm(kvm, &args); >> break; >> } >> + case KVM_CAP_ARM_RME_POPULATE_REALM: { >> + struct kvm_cap_arm_rme_populate_realm_args args; >> + void __user *argp = u64_to_user_ptr(cap->args[1]); >> + >> + if (copy_from_user(&args, argp, sizeof(args))) { >> + r = -EFAULT; >> + break; >> + } >> + >> + r = kvm_populate_realm(kvm, &args); >> + break; >> + } >> default: >> r = -EINVAL; >> break; > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from bombadil.infradead.org (bombadil.infradead.org [198.137.202.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 52B76C6FD1F for ; Fri, 10 Mar 2023 15:50:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=lists.infradead.org; s=bombadil.20210309; h=Sender: Content-Transfer-Encoding:Content-Type:List-Subscribe:List-Help:List-Post: List-Archive:List-Unsubscribe:List-Id:In-Reply-To:From:References:Cc:To: Subject:MIME-Version:Date:Message-ID:Reply-To:Content-ID:Content-Description: Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: List-Owner; bh=lATZhT2qQR+UFwyhGIT0oDnuSxwmDUm44RfVyXuVe+o=; b=LeEJFgzQFp99Qy waG6Q9uZUOTODh2WG8FYpS1oe1xq10aLYM4qlB59T6nyfUyuNj/aJjX7eUfDzlHY3v/SDYmVLu7QV Slsqwdn3GwFClHbyhD1i6DPH/szYyrgpir001AV5GiX7l0iyA/G0Xj3aST/W+e4rnh17n+3X9Wa6z dSzT43fubDNSBiglDyHu+5CmJVq+E15rmBvbzXcvDfMC10HfUAeL55rvIE8y0IwMOSJWPP5MK1dhj uAIGeqTizM8ZTUtWtjHRkXggJ6bN99v50Bqod9SYxvbnl7HoLOXOIdW8D5z1w7NTGdQc2Q+46S6hY EjqUnFJVbGsL8G8LpdmQ==; Received: from localhost ([::1] helo=bombadil.infradead.org) by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux)) id 1paezU-00F5Xk-4w; Fri, 10 Mar 2023 15:49:08 +0000 Received: from foss.arm.com ([217.140.110.172]) by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux)) id 1paexr-00F4f1-04 for linux-arm-kernel@lists.infradead.org; Fri, 10 Mar 2023 15:47:31 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 8CCBFAD7; Fri, 10 Mar 2023 07:48:04 -0800 (PST) Received: from [10.1.25.11] (e122027.cambridge.arm.com [10.1.25.11]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 700F03FA32; Fri, 10 Mar 2023 07:47:18 -0800 (PST) Message-ID: <6c6ff608-2314-a49a-84ee-fac883c4e227@arm.com> Date: Fri, 10 Mar 2023 15:47:16 +0000 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.7.1 Subject: Re: [RFC PATCH 16/28] arm64: RME: Allow populating initial contents Content-Language: en-GB To: Zhi Wang Cc: kvm@vger.kernel.org, kvmarm@lists.linux.dev, Catalin Marinas , Marc Zyngier , Will Deacon , James Morse , Oliver Upton , Suzuki K Poulose , Zenghui Yu , linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Joey Gouly , Alexandru Elisei , Christoffer Dall , Fuad Tabba , linux-coco@lists.linux.dev References: <20230127112248.136810-1-suzuki.poulose@arm.com> <20230127112932.38045-1-steven.price@arm.com> <20230127112932.38045-17-steven.price@arm.com> <20230306193439.000048f2@gmail.com> From: Steven Price In-Reply-To: <20230306193439.000048f2@gmail.com> X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 X-CRM114-CacheID: sfid-20230310_074727_217132_65C6E4F5 X-CRM114-Status: GOOD ( 32.83 ) X-BeenThere: linux-arm-kernel@lists.infradead.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: "linux-arm-kernel" Errors-To: linux-arm-kernel-bounces+linux-arm-kernel=archiver.kernel.org@lists.infradead.org On 06/03/2023 17:34, Zhi Wang wrote: > On Fri, 27 Jan 2023 11:29:20 +0000 > Steven Price wrote: > >> The VMM needs to populate the realm with some data before starting (e.g. >> a kernel and initrd). This is measured by the RMM and used as part of >> the attestation later on. >> >> Signed-off-by: Steven Price >> --- >> arch/arm64/kvm/rme.c | 366 +++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 366 insertions(+) >> >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c >> index 16e0bfea98b1..3405b43e1421 100644 >> --- a/arch/arm64/kvm/rme.c >> +++ b/arch/arm64/kvm/rme.c >> @@ -4,6 +4,7 @@ >> */ >> >> #include >> +#include >> >> #include >> #include >> @@ -426,6 +427,359 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size) >> } >> } >> >> +static int realm_create_protected_data_page(struct realm *realm, >> + unsigned long ipa, >> + struct page *dst_page, >> + struct page *tmp_page) >> +{ >> + phys_addr_t dst_phys, tmp_phys; >> + int ret; >> + >> + copy_page(page_address(tmp_page), page_address(dst_page)); >> + >> + dst_phys = page_to_phys(dst_page); >> + tmp_phys = page_to_phys(tmp_page); >> + >> + if (rmi_granule_delegate(dst_phys)) >> + return -ENXIO; >> + >> + ret = rmi_data_create(dst_phys, virt_to_phys(realm->rd), ipa, tmp_phys, >> + RMI_MEASURE_CONTENT); >> + >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + /* Create missing RTTs and retry */ >> + int level = RMI_RETURN_INDEX(ret); >> + >> + ret = realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, NULL); >> + if (ret) >> + goto err; >> + >> + ret = rmi_data_create(dst_phys, virt_to_phys(realm->rd), ipa, >> + tmp_phys, RMI_MEASURE_CONTENT); >> + } >> + >> + if (ret) >> + goto err; >> + >> + return 0; >> + >> +err: >> + if (WARN_ON(rmi_granule_undelegate(dst_phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(dst_page); >> + } >> + return -ENXIO; >> +} >> + >> +static int fold_rtt(phys_addr_t rd, unsigned long addr, int level, >> + struct realm *realm) >> +{ >> + struct rtt_entry rtt; >> + phys_addr_t rtt_addr; >> + >> + if (rmi_rtt_read_entry(rd, addr, level, &rtt)) >> + return -ENXIO; >> + >> + if (rtt.state != RMI_TABLE) >> + return -EINVAL; >> + >> + rtt_addr = rmi_rtt_get_phys(&rtt); >> + if (rmi_rtt_fold(rtt_addr, rd, addr, level + 1)) >> + return -ENXIO; >> + >> + free_delegated_page(realm, rtt_addr); >> + >> + return 0; >> +} >> + >> +int realm_map_protected(struct realm *realm, >> + unsigned long hva, >> + unsigned long base_ipa, >> + struct page *dst_page, >> + unsigned long map_size, >> + struct kvm_mmu_memory_cache *memcache) >> +{ >> + phys_addr_t dst_phys = page_to_phys(dst_page); >> + phys_addr_t rd = virt_to_phys(realm->rd); >> + unsigned long phys = dst_phys; >> + unsigned long ipa = base_ipa; >> + unsigned long size; >> + int map_level; >> + int ret = 0; >> + >> + if (WARN_ON(!IS_ALIGNED(ipa, map_size))) >> + return -EINVAL; >> + >> + switch (map_size) { >> + case PAGE_SIZE: >> + map_level = 3; >> + break; >> + case RME_L2_BLOCK_SIZE: >> + map_level = 2; >> + break; >> + default: >> + return -EINVAL; >> + } >> + >> + if (map_level < RME_RTT_MAX_LEVEL) { >> + /* >> + * A temporary RTT is needed during the map, precreate it, >> + * however if there is an error (e.g. missing parent tables) >> + * this will be handled below. >> + */ >> + realm_create_rtt_levels(realm, ipa, map_level, >> + RME_RTT_MAX_LEVEL, memcache); >> + } >> + >> + for (size = 0; size < map_size; size += PAGE_SIZE) { >> + if (rmi_granule_delegate(phys)) { >> + struct rtt_entry rtt; >> + >> + /* >> + * It's possible we raced with another VCPU on the same >> + * fault. If the entry exists and matches then exit >> + * early and assume the other VCPU will handle the >> + * mapping. >> + */ >> + if (rmi_rtt_read_entry(rd, ipa, RME_RTT_MAX_LEVEL, &rtt)) >> + goto err; >> + >> + // FIXME: For a block mapping this could race at level >> + // 2 or 3... >> + if (WARN_ON((rtt.walk_level != RME_RTT_MAX_LEVEL || >> + rtt.state != RMI_ASSIGNED || >> + rtt.desc != phys))) { >> + goto err; >> + } >> + >> + return 0; >> + } >> + >> + ret = rmi_data_create_unknown(phys, rd, ipa); >> + >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + /* Create missing RTTs and retry */ >> + int level = RMI_RETURN_INDEX(ret); >> + >> + ret = realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, >> + memcache); >> + WARN_ON(ret); >> + if (ret) >> + goto err_undelegate; >> + >> + ret = rmi_data_create_unknown(phys, rd, ipa); >> + } >> + WARN_ON(ret); >> + >> + if (ret) >> + goto err_undelegate; >> + >> + phys += PAGE_SIZE; >> + ipa += PAGE_SIZE; >> + } >> + >> + if (map_size == RME_L2_BLOCK_SIZE) >> + ret = fold_rtt(rd, base_ipa, map_level, realm); >> + if (WARN_ON(ret)) >> + goto err; >> + >> + return 0; >> + >> +err_undelegate: >> + if (WARN_ON(rmi_granule_undelegate(phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(phys_to_page(phys)); >> + } >> +err: >> + while (size > 0) { >> + phys -= PAGE_SIZE; >> + size -= PAGE_SIZE; >> + ipa -= PAGE_SIZE; >> + >> + rmi_data_destroy(rd, ipa); >> + >> + if (WARN_ON(rmi_granule_undelegate(phys))) { >> + /* Page can't be returned to NS world so is lost */ >> + get_page(phys_to_page(phys)); >> + } >> + } >> + return -ENXIO; >> +} >> + > > There seems no caller to the function above. Better move it to the related > patch. Indeed this should really be in the next patch - will move as it's very confusing having it in this patch (sorry about that). >> +static int populate_par_region(struct kvm *kvm, >> + phys_addr_t ipa_base, >> + phys_addr_t ipa_end) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + struct kvm_memory_slot *memslot; >> + gfn_t base_gfn, end_gfn; >> + int idx; >> + phys_addr_t ipa; >> + int ret = 0; >> + struct page *tmp_page; >> + phys_addr_t rd = virt_to_phys(realm->rd); >> + >> + base_gfn = gpa_to_gfn(ipa_base); >> + end_gfn = gpa_to_gfn(ipa_end); >> + >> + idx = srcu_read_lock(&kvm->srcu); >> + memslot = gfn_to_memslot(kvm, base_gfn); >> + if (!memslot) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + /* We require the region to be contained within a single memslot */ >> + if (memslot->base_gfn + memslot->npages < end_gfn) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + tmp_page = alloc_page(GFP_KERNEL); >> + if (!tmp_page) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + mmap_read_lock(current->mm); >> + >> + ipa = ipa_base; >> + >> + while (ipa < ipa_end) { >> + struct vm_area_struct *vma; >> + unsigned long map_size; >> + unsigned int vma_shift; >> + unsigned long offset; >> + unsigned long hva; >> + struct page *page; >> + kvm_pfn_t pfn; >> + int level; >> + >> + hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa)); >> + vma = vma_lookup(current->mm, hva); >> + if (!vma) { >> + ret = -EFAULT; >> + break; >> + } >> + >> + if (is_vm_hugetlb_page(vma)) >> + vma_shift = huge_page_shift(hstate_vma(vma)); >> + else >> + vma_shift = PAGE_SHIFT; >> + >> + map_size = 1 << vma_shift; >> + >> + /* >> + * FIXME: This causes over mapping, but there's no good >> + * solution here with the ABI as it stands >> + */ >> + ipa = ALIGN_DOWN(ipa, map_size); >> + >> + switch (map_size) { >> + case RME_L2_BLOCK_SIZE: >> + level = 2; >> + break; >> + case PAGE_SIZE: >> + level = 3; >> + break; >> + default: >> + WARN_ONCE(1, "Unsupport vma_shift %d", vma_shift); >> + ret = -EFAULT; >> + break; >> + } >> + >> + pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); >> + >> + if (is_error_pfn(pfn)) { >> + ret = -EFAULT; >> + break; >> + } >> + >> + ret = rmi_rtt_init_ripas(rd, ipa, level); >> + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { >> + ret = realm_create_rtt_levels(realm, ipa, >> + RMI_RETURN_INDEX(ret), >> + level, NULL); >> + if (ret) >> + break; >> + ret = rmi_rtt_init_ripas(rd, ipa, level); >> + if (ret) { >> + ret = -ENXIO; >> + break; >> + } >> + } >> + >> + if (level < RME_RTT_MAX_LEVEL) { >> + /* >> + * A temporary RTT is needed during the map, precreate >> + * it, however if there is an error (e.g. missing >> + * parent tables) this will be handled in the >> + * realm_create_protected_data_page() call. >> + */ >> + realm_create_rtt_levels(realm, ipa, level, >> + RME_RTT_MAX_LEVEL, NULL); >> + } >> + >> + page = pfn_to_page(pfn); >> + >> + for (offset = 0; offset < map_size && !ret; >> + offset += PAGE_SIZE, page++) { >> + phys_addr_t page_ipa = ipa + offset; >> + >> + ret = realm_create_protected_data_page(realm, page_ipa, >> + page, tmp_page); >> + } >> + if (ret) >> + goto err_release_pfn; >> + >> + if (level == 2) { >> + ret = fold_rtt(rd, ipa, level, realm); >> + if (ret) >> + goto err_release_pfn; >> + } >> + >> + ipa += map_size; > >> + kvm_set_pfn_accessed(pfn); >> + kvm_set_pfn_dirty(pfn); > > kvm_release_pfn_dirty() has already called kvm_set_pfn_{accessed, dirty}(). Will remove those calls. >> + kvm_release_pfn_dirty(pfn); >> +err_release_pfn: >> + if (ret) { >> + kvm_release_pfn_clean(pfn); >> + break; >> + } >> + } >> + >> + mmap_read_unlock(current->mm); >> + __free_page(tmp_page); >> + >> +out: >> + srcu_read_unlock(&kvm->srcu, idx); >> + return ret; >> +} >> + >> +static int kvm_populate_realm(struct kvm *kvm, >> + struct kvm_cap_arm_rme_populate_realm_args *args) >> +{ >> + phys_addr_t ipa_base, ipa_end; >> + > > Check kvm_is_realm(kvm) here or in the kvm_realm_enable_cap(). I'm going to update kvm_vm_ioctl_enable_cap() to check kvm_is_realm() so we won't get here. >> + if (kvm_realm_state(kvm) != REALM_STATE_NEW) >> + return -EBUSY; > > Maybe -EINVAL? The realm hasn't been created (RMI_REALM_CREATE is not called > yet). The userspace shouldn't reach this path. Well user space can attempt to populate in the ACTIVE state - which is where the idea of 'busy' comes from. Admittedly it's a little confusing when RMI_REALM_CREATE hasn't been called. I'm not particularly bothered about the return code, but it's useful to have a different code to -EINVAL as it's not an invalid argument, but calling at the wrong time. I can't immediately see a better error code though. Steve >> + >> + if (!IS_ALIGNED(args->populate_ipa_base, PAGE_SIZE) || >> + !IS_ALIGNED(args->populate_ipa_size, PAGE_SIZE)) >> + return -EINVAL; >> + >> + ipa_base = args->populate_ipa_base; >> + ipa_end = ipa_base + args->populate_ipa_size; >> + >> + if (ipa_end < ipa_base) >> + return -EINVAL; >> + >> + return populate_par_region(kvm, ipa_base, ipa_end); >> +} >> + >> static int set_ipa_state(struct kvm_vcpu *vcpu, >> unsigned long ipa, >> unsigned long end, >> @@ -748,6 +1102,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) >> r = kvm_init_ipa_range_realm(kvm, &args); >> break; >> } >> + case KVM_CAP_ARM_RME_POPULATE_REALM: { >> + struct kvm_cap_arm_rme_populate_realm_args args; >> + void __user *argp = u64_to_user_ptr(cap->args[1]); >> + >> + if (copy_from_user(&args, argp, sizeof(args))) { >> + r = -EFAULT; >> + break; >> + } >> + >> + r = kvm_populate_realm(kvm, &args); >> + break; >> + } >> default: >> r = -EINVAL; >> break; > _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel