All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: ccross@google.com, sumit.semwal@linaro.org, mhocko@suse.com,
	dave.hansen@intel.com, keescook@chromium.org,
	willy@infradead.org, kirill.shutemov@linux.intel.com,
	vbabka@suse.cz, hannes@cmpxchg.org, corbet@lwn.net,
	viro@zeniv.linux.org.uk, rdunlap@infradead.org,
	kaleshsingh@google.com, peterx@redhat.com, rppt@kernel.org,
	peterz@infradead.org, catalin.marinas@arm.com,
	vincenzo.frascino@arm.com, chinwen.chang@mediatek.com,
	axelrasmussen@google.com, aarcange@redhat.com, jannh@google.com,
	apopple@nvidia.com, jhubbard@nvidia.com, yuzhao@google.com,
	will@kernel.org, fenghua.yu@intel.com,
	thunder.leizhen@huawei.com, hughd@google.com,
	feng.tang@intel.com, jgg@ziepe.ca, guro@fb.com,
	tglx@linutronix.de, krisman@collabora.com,
	chris.hyser@oracle.com, pcc@google.com, ebiederm@xmission.com,
	axboe@kernel.dk, legion@kernel.org, eb@emlix.com,
	songmuchun@bytedance.com, viresh.kumar@linaro.org,
	thomascedeno@google.com, sashal@kernel.org, cxfcosmos@gmail.com,
	linux@rasmusvillemoes.dk, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-mm@kvack.org, kernel-team@android.com, surenb@google.com
Subject: [PATCH v8 3/3] mm: add anonymous vma name refcounting
Date: Fri, 27 Aug 2021 12:18:58 -0700	[thread overview]
Message-ID: <20210827191858.2037087-4-surenb@google.com> (raw)
In-Reply-To: <20210827191858.2037087-1-surenb@google.com>

While forking a process with high number (64K) of named anonymous vmas the
overhead caused by strdup() is noticeable. Experiments with ARM64 Android
device show up to 40% performance regression when forking a process with
64k unpopulated anonymous vmas using the max name lengths vs the same
process with the same number of anonymous vmas having no name.
Introduce anon_vma_name refcounted structure to avoid the overhead of
copying vma names during fork() and when splitting named anonymous vmas.
When a vma is duplicated, instead of copying the name we increment the
refcount of this structure. Multiple vmas can point to the same
anon_vma_name as long as they increment the refcount. The name member of
anon_vma_name structure is assigned at structure allocation time and is
never changed. If vma name changes then the refcount of the original
structure is dropped, a new anon_vma_name structure is allocated
to hold the new name and the vma pointer is updated to point to the new
structure.
With this approach the fork() performance regressions is reduced 3-4x
times and with usecases using more reasonable number of VMAs (a few
thousand) the regressions is not measurable.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/mm_types.h |  9 ++++++++-
 mm/madvise.c             | 42 +++++++++++++++++++++++++++++++++-------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26a30f7a5228..a7361acf2921 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -5,6 +5,7 @@
 #include <linux/mm_types_task.h>
 
 #include <linux/auxvec.h>
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
@@ -302,6 +303,12 @@ struct vm_userfaultfd_ctx {
 struct vm_userfaultfd_ctx {};
 #endif /* CONFIG_USERFAULTFD */
 
+struct anon_vma_name {
+	struct kref kref;
+	/* The name needs to be at the end because it is dynamically sized. */
+	char name[];
+};
+
 /*
  * This struct describes a virtual memory area. There is one of these
  * per VM-area/task. A VM area is any part of the process virtual memory
@@ -353,7 +360,7 @@ struct vm_area_struct {
 			unsigned long rb_subtree_last;
 		} shared;
 		/* Serialized by mmap_sem. */
-		char *anon_name;
+		struct anon_vma_name *anon_name;
 	};
 
 	/*
diff --git a/mm/madvise.c b/mm/madvise.c
index bc029f3fca6a..32ac5dc5ebf3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -63,6 +63,27 @@ static int madvise_need_mmap_write(int behavior)
 	}
 }
 
+static struct anon_vma_name *anon_vma_name_alloc(const char *name)
+{
+	struct anon_vma_name *anon_name;
+	size_t len = strlen(name);
+
+	/* Add 1 for NUL terminator at the end of the anon_name->name */
+	anon_name = kzalloc(sizeof(*anon_name) + len + 1,
+			    GFP_KERNEL);
+	kref_init(&anon_name->kref);
+	strcpy(anon_name->name, name);
+
+	return anon_name;
+}
+
+static void vma_anon_name_free(struct kref *kref)
+{
+	struct anon_vma_name *anon_name =
+			container_of(kref, struct anon_vma_name, kref);
+	kfree(anon_name);
+}
+
 static inline bool has_vma_anon_name(struct vm_area_struct *vma)
 {
 	return !vma->vm_file && vma->anon_name;
@@ -75,7 +96,7 @@ const char *vma_anon_name(struct vm_area_struct *vma)
 
 	mmap_assert_locked(vma->vm_mm);
 
-	return vma->anon_name;
+	return vma->anon_name->name;
 }
 
 void dup_vma_anon_name(struct vm_area_struct *orig_vma,
@@ -84,37 +105,44 @@ void dup_vma_anon_name(struct vm_area_struct *orig_vma,
 	if (!has_vma_anon_name(orig_vma))
 		return;
 
-	new_vma->anon_name = kstrdup(orig_vma->anon_name, GFP_KERNEL);
+	kref_get(&orig_vma->anon_name->kref);
+	new_vma->anon_name = orig_vma->anon_name;
 }
 
 void free_vma_anon_name(struct vm_area_struct *vma)
 {
+	struct anon_vma_name *anon_name;
+
 	if (!has_vma_anon_name(vma))
 		return;
 
-	kfree(vma->anon_name);
+	anon_name = vma->anon_name;
 	vma->anon_name = NULL;
+	kref_put(&anon_name->kref, vma_anon_name_free);
 }
 
 /* mmap_lock should be write-locked */
 static void replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
 {
+	const char *anon_name;
+
 	if (!name) {
 		free_vma_anon_name(vma);
 		return;
 	}
 
-	if (vma->anon_name) {
+	anon_name = vma_anon_name(vma);
+	if (anon_name) {
 		/* Should never happen, to dup use dup_vma_anon_name() */
-		WARN_ON(vma->anon_name == name);
+		WARN_ON(anon_name == name);
 
 		/* Same name, nothing to do here */
-		if (!strcmp(name, vma->anon_name))
+		if (!strcmp(name, anon_name))
 			return;
 
 		free_vma_anon_name(vma);
 	}
-	vma->anon_name = kstrdup(name, GFP_KERNEL);
+	vma->anon_name = anon_vma_name_alloc(name);
 }
 
 /*
-- 
2.33.0.259.gc128427fd7-goog


  parent reply	other threads:[~2021-08-27 19:19 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-27 19:18 [PATCH v8 0/3] Anonymous VMA naming patches Suren Baghdasaryan
2021-08-27 19:18 ` Suren Baghdasaryan
2021-08-27 19:18 ` [PATCH v8 1/3] mm: rearrange madvise code to allow for reuse Suren Baghdasaryan
2021-08-27 19:18   ` Suren Baghdasaryan
2021-08-28  0:14   ` Kees Cook
2021-08-28  0:58     ` Suren Baghdasaryan
2021-08-28 16:19   ` Cyrill Gorcunov
2021-08-28 21:59     ` Suren Baghdasaryan
2021-08-27 19:18 ` [PATCH v8 2/3] mm: add a field to store names for private anonymous memory Suren Baghdasaryan
2021-08-27 19:18   ` Suren Baghdasaryan
2021-08-28  1:47   ` Matthew Wilcox
2021-08-28  5:52     ` Kees Cook
2021-08-28 21:47       ` Suren Baghdasaryan
2021-08-30  8:12         ` Rasmus Villemoes
2021-08-30 16:16           ` Suren Baghdasaryan
2021-08-30 16:59             ` Matthew Wilcox
2021-08-31 17:21               ` Suren Baghdasaryan
2021-08-28 21:28   ` Cyrill Gorcunov
2021-08-28 21:53     ` Suren Baghdasaryan
2021-09-01  8:09   ` Michal Hocko
2021-09-01 15:28     ` Suren Baghdasaryan
2021-09-01  8:10   ` Michal Hocko
2021-09-01 15:42     ` Suren Baghdasaryan
2021-09-03 11:49       ` Michal Hocko
2021-09-03 15:47         ` Suren Baghdasaryan
2021-08-27 19:18 ` Suren Baghdasaryan [this message]
2021-08-27 19:18   ` [PATCH v8 3/3] mm: add anonymous vma name refcounting Suren Baghdasaryan
2021-08-28  5:28   ` Kees Cook
2021-08-28 21:13     ` Suren Baghdasaryan
2021-08-30  7:03   ` Rolf Eike Beer
2021-08-30  7:03     ` Rolf Eike Beer
2021-08-30 16:12     ` Suren Baghdasaryan
2021-08-28 12:48 ` [PATCH v8 0/3] Anonymous VMA naming patches Pavel Machek
2021-08-28 22:06   ` Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210827191858.2037087-4-surenb@google.com \
    --to=surenb@google.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axboe@kernel.dk \
    --cc=axelrasmussen@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=ccross@google.com \
    --cc=chinwen.chang@mediatek.com \
    --cc=chris.hyser@oracle.com \
    --cc=corbet@lwn.net \
    --cc=cxfcosmos@gmail.com \
    --cc=dave.hansen@intel.com \
    --cc=eb@emlix.com \
    --cc=ebiederm@xmission.com \
    --cc=feng.tang@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=kaleshsingh@google.com \
    --cc=keescook@chromium.org \
    --cc=kernel-team@android.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=krisman@collabora.com \
    --cc=legion@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=mhocko@suse.com \
    --cc=pcc@google.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=rppt@kernel.org \
    --cc=sashal@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=sumit.semwal@linaro.org \
    --cc=tglx@linutronix.de \
    --cc=thomascedeno@google.com \
    --cc=thunder.leizhen@huawei.com \
    --cc=vbabka@suse.cz \
    --cc=vincenzo.frascino@arm.com \
    --cc=viresh.kumar@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.