From: Mike Rapoport <rppt@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
Andy Lutomirski <luto@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
Borislav Petkov <bp@alien8.de>,
Catalin Marinas <catalin.marinas@arm.com>,
Christopher Lameter <cl@linux.com>,
Dan Williams <dan.j.williams@intel.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
David Hildenbrand <david@redhat.com>,
Elena Reshetova <elena.reshetova@intel.com>,
"H. Peter Anvin" <hpa@zytor.com>, Idan Yaniv <idan.yaniv@ibm.com>,
Ingo Molnar <mingo@redhat.com>,
James Bottomley <jejb@linux.ibm.com>,
"Kirill A. Shutemov" <kirill@shutemov.name>,
Matthew Wilcox <willy@infradead.org>,
Mark Rutland <mark.rutland@arm.com>,
Mike Rapoport <rppt@linux.ibm.com>,
Mike Rapoport <rppt@kernel.org>,
Michael Kerrisk <mtk.manpages@gmail.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Paul Walmsley <paul.walmsley@sifive.com>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
Shuah Khan <shuah@kernel.org>, Tycho Andersen <tycho@tycho.ws>,
Will Deacon <will@kernel.org>,
linux-api@vger.kernel.org, linux-arch@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org,
linux-nvdimm@lists.01.org, linux-riscv@lists.infradead.org,
x86@kernel.org
Subject: [PATCH v6 5/6] mm: secretmem: use PMD-size pages to amortize direct map fragmentation
Date: Thu, 24 Sep 2020 16:29:03 +0300 [thread overview]
Message-ID: <20200924132904.1391-6-rppt@kernel.org> (raw)
In-Reply-To: <20200924132904.1391-1-rppt@kernel.org>
From: Mike Rapoport <rppt@linux.ibm.com>
Removing a PAGE_SIZE page from the direct map every time such page is
allocated for a secret memory mapping will cause severe fragmentation of
the direct map. This fragmentation can be reduced by using PMD-size pages
as a pool for small pages for secret memory mappings.
Add a gen_pool per secretmem inode and lazily populate this pool with
PMD-size pages.
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
mm/secretmem.c | 107 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 88 insertions(+), 19 deletions(-)
diff --git a/mm/secretmem.c b/mm/secretmem.c
index 3293f761076e..333eb18fb483 100644
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -12,6 +12,7 @@
#include <linux/bitops.h>
#include <linux/printk.h>
#include <linux/pagemap.h>
+#include <linux/genalloc.h>
#include <linux/syscalls.h>
#include <linux/pseudo_fs.h>
#include <linux/set_memory.h>
@@ -40,24 +41,66 @@
#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
struct secretmem_ctx {
+ struct gen_pool *pool;
unsigned int mode;
};
-static struct page *secretmem_alloc_page(gfp_t gfp)
+static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp)
{
- /*
- * FIXME: use a cache of large pages to reduce the direct map
- * fragmentation
- */
- return alloc_page(gfp);
+ unsigned long nr_pages = (1 << PMD_PAGE_ORDER);
+ struct gen_pool *pool = ctx->pool;
+ unsigned long addr;
+ struct page *page;
+ int err;
+
+ page = alloc_pages(gfp, PMD_PAGE_ORDER);
+ if (!page)
+ return -ENOMEM;
+
+ addr = (unsigned long)page_address(page);
+ split_page(page, PMD_PAGE_ORDER);
+
+ err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE);
+ if (err) {
+ __free_pages(page, PMD_PAGE_ORDER);
+ return err;
+ }
+
+ __kernel_map_pages(page, nr_pages, 0);
+
+ return 0;
+}
+
+static struct page *secretmem_alloc_page(struct secretmem_ctx *ctx,
+ gfp_t gfp)
+{
+ struct gen_pool *pool = ctx->pool;
+ unsigned long addr;
+ struct page *page;
+ int err;
+
+ if (gen_pool_avail(pool) < PAGE_SIZE) {
+ err = secretmem_pool_increase(ctx, gfp);
+ if (err)
+ return NULL;
+ }
+
+ addr = gen_pool_alloc(pool, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+
+ page = virt_to_page(addr);
+ get_page(page);
+
+ return page;
}
static vm_fault_t secretmem_fault(struct vm_fault *vmf)
{
+ struct secretmem_ctx *ctx = vmf->vma->vm_file->private_data;
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct inode *inode = file_inode(vmf->vma->vm_file);
pgoff_t offset = vmf->pgoff;
- unsigned long addr;
struct page *page;
int ret = 0;
@@ -66,7 +109,7 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
page = find_get_entry(mapping, offset);
if (!page) {
- page = secretmem_alloc_page(vmf->gfp_mask);
+ page = secretmem_alloc_page(ctx, vmf->gfp_mask);
if (!page)
return vmf_error(-ENOMEM);
@@ -74,14 +117,8 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
if (unlikely(ret))
goto err_put_page;
- ret = set_direct_map_invalid_noflush(page);
- if (ret)
- goto err_del_page_cache;
-
- addr = (unsigned long)page_address(page);
- flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-
__SetPageUptodate(page);
+ set_page_private(page, (unsigned long)ctx);
ret = VM_FAULT_LOCKED;
}
@@ -89,8 +126,6 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf)
vmf->page = page;
return ret;
-err_del_page_cache:
- delete_from_page_cache(page);
err_put_page:
put_page(page);
return vmf_error(ret);
@@ -138,7 +173,11 @@ static int secretmem_migratepage(struct address_space *mapping,
static void secretmem_freepage(struct page *page)
{
- set_direct_map_default_noflush(page);
+ unsigned long addr = (unsigned long)page_address(page);
+ struct secretmem_ctx *ctx = (struct secretmem_ctx *)page_private(page);
+ struct gen_pool *pool = ctx->pool;
+
+ gen_pool_free(pool, addr, PAGE_SIZE);
}
static const struct address_space_operations secretmem_aops = {
@@ -163,13 +202,18 @@ static struct file *secretmem_file_create(unsigned long flags)
if (!ctx)
goto err_free_inode;
+ ctx->pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
+ if (!ctx->pool)
+ goto err_free_ctx;
+
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
O_RDWR, &secretmem_fops);
if (IS_ERR(file))
- goto err_free_ctx;
+ goto err_free_pool;
mapping_set_unevictable(inode->i_mapping);
+ inode->i_private = ctx;
inode->i_mapping->private_data = ctx;
inode->i_mapping->a_ops = &secretmem_aops;
@@ -183,6 +227,8 @@ static struct file *secretmem_file_create(unsigned long flags)
return file;
+err_free_pool:
+ gen_pool_destroy(ctx->pool);
err_free_ctx:
kfree(ctx);
err_free_inode:
@@ -221,11 +267,34 @@ SYSCALL_DEFINE1(memfd_secret, unsigned long, flags)
return err;
}
+static void secretmem_cleanup_chunk(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk, void *data)
+{
+ unsigned long start = chunk->start_addr;
+ unsigned long end = chunk->end_addr;
+ unsigned long nr_pages, addr;
+
+ nr_pages = (end - start + 1) / PAGE_SIZE;
+ __kernel_map_pages(virt_to_page(start), nr_pages, 1);
+
+ for (addr = start; addr < end; addr += PAGE_SIZE)
+ put_page(virt_to_page(addr));
+}
+
+static void secretmem_cleanup_pool(struct secretmem_ctx *ctx)
+{
+ struct gen_pool *pool = ctx->pool;
+
+ gen_pool_for_each_chunk(pool, secretmem_cleanup_chunk, ctx);
+ gen_pool_destroy(pool);
+}
+
static void secretmem_evict_inode(struct inode *inode)
{
struct secretmem_ctx *ctx = inode->i_private;
truncate_inode_pages_final(&inode->i_data);
+ secretmem_cleanup_pool(ctx);
clear_inode(inode);
kfree(ctx);
}
--
2.28.0
next prev parent reply other threads:[~2020-09-24 13:30 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-24 13:28 [PATCH v6 0/6] mm: introduce memfd_secret system call to create "secret" memory areas Mike Rapoport
2020-09-24 13:28 ` [PATCH v6 1/6] mm: add definition of PMD_PAGE_ORDER Mike Rapoport
2020-09-24 13:29 ` [PATCH v6 2/6] mmap: make mlock_future_check() global Mike Rapoport
2020-09-24 13:29 ` [PATCH v6 3/6] mm: introduce memfd_secret system call to create "secret" memory areas Mike Rapoport
2020-09-29 4:58 ` Edgecombe, Rick P
2020-09-29 13:06 ` Mike Rapoport
2020-09-29 20:06 ` Edgecombe, Rick P
2020-09-30 10:35 ` Mike Rapoport
2020-09-30 20:11 ` Edgecombe, Rick P
2020-10-11 9:42 ` Mike Rapoport
2020-09-24 13:29 ` [PATCH v6 4/6] arch, mm: wire up memfd_secret system call were relevant Mike Rapoport
2020-09-24 13:29 ` Mike Rapoport [this message]
2020-09-25 7:41 ` [PATCH v6 5/6] mm: secretmem: use PMD-size pages to amortize direct map fragmentation Peter Zijlstra
2020-09-25 9:00 ` David Hildenbrand
2020-09-25 9:50 ` Peter Zijlstra
2020-09-25 10:31 ` Mark Rutland
2020-09-25 14:57 ` Tycho Andersen
2020-09-29 14:04 ` Mike Rapoport
2020-09-29 13:07 ` Mike Rapoport
2020-09-29 13:06 ` Mike Rapoport
2020-09-29 13:05 ` Mike Rapoport
2020-09-29 14:12 ` Peter Zijlstra
2020-09-29 14:31 ` Dave Hansen
2020-09-29 14:58 ` Mike Rapoport
2020-09-29 15:15 ` Peter Zijlstra
2020-09-30 10:27 ` Mike Rapoport
2020-09-30 14:39 ` James Bottomley
2020-09-30 14:45 ` David Hildenbrand
2020-09-30 15:17 ` James Bottomley
2020-09-30 15:25 ` David Hildenbrand
2020-09-30 15:09 ` Matthew Wilcox
2020-10-01 8:14 ` Mike Rapoport
2020-09-29 15:03 ` James Bottomley
2020-09-30 10:20 ` Mike Rapoport
2020-09-30 10:43 ` Peter Zijlstra
2020-09-24 13:29 ` [PATCH v6 6/6] secretmem: test: add basic selftest for memfd_secret(2) Mike Rapoport
2020-09-24 13:35 ` [PATCH] man2: new page describing memfd_secret() system call Mike Rapoport
2020-09-24 14:55 ` Alejandro Colomar
2020-10-03 9:32 ` Alejandro Colomar
2020-10-05 7:32 ` Mike Rapoport
2020-11-16 21:01 ` [PATCH v2] memfd_secret.2: New " Alejandro Colomar
2020-11-17 6:26 ` Mike Rapoport
2020-09-25 2:34 ` [PATCH v6 0/6] mm: introduce memfd_secret system call to create "secret" memory areas Andrew Morton
2020-09-25 6:42 ` Mike Rapoport
2020-11-01 11:09 ` Hagen Paul Pfeifer
2020-11-02 15:40 ` Mike Rapoport
2020-11-03 13:52 ` Hagen Paul Pfeifer
2020-11-03 16:30 ` Mike Rapoport
2020-11-04 11:39 ` Hagen Paul Pfeifer
2020-11-04 17:02 ` Mike Rapoport
2020-11-09 10:41 ` Hagen Paul Pfeifer
2020-11-02 9:11 ` David Hildenbrand
2020-11-02 9:31 ` David Hildenbrand
2020-11-02 17:43 ` Mike Rapoport
2020-11-02 17:51 ` David Hildenbrand
2020-11-03 9:52 ` Mike Rapoport
2020-11-03 10:11 ` David Hildenbrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200924132904.1391-6-rppt@kernel.org \
--to=rppt@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=bp@alien8.de \
--cc=catalin.marinas@arm.com \
--cc=cl@linux.com \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=elena.reshetova@intel.com \
--cc=hpa@zytor.com \
--cc=idan.yaniv@ibm.com \
--cc=jejb@linux.ibm.com \
--cc=kirill@shutemov.name \
--cc=linux-api@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=linux-riscv@lists.infradead.org \
--cc=luto@kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=mtk.manpages@gmail.com \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=peterz@infradead.org \
--cc=rppt@linux.ibm.com \
--cc=shuah@kernel.org \
--cc=tglx@linutronix.de \
--cc=tycho@tycho.ws \
--cc=viro@zeniv.linux.org.uk \
--cc=will@kernel.org \
--cc=willy@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).