All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kees Cook <keescook@chromium.org>
To: Suren Baghdasaryan <surenb@google.com>
Cc: akpm@linux-foundation.org, ccross@google.com,
	sumit.semwal@linaro.org, mhocko@suse.com, dave.hansen@intel.com,
	willy@infradead.org, kirill.shutemov@linux.intel.com,
	vbabka@suse.cz, hannes@cmpxchg.org, corbet@lwn.net,
	viro@zeniv.linux.org.uk, rdunlap@infradead.org,
	kaleshsingh@google.com, peterx@redhat.com, rppt@kernel.org,
	peterz@infradead.org, catalin.marinas@arm.com,
	vincenzo.frascino@arm.com, chinwen.chang@mediatek.com,
	axelrasmussen@google.com, aarcange@redhat.com, jannh@google.com,
	apopple@nvidia.com, jhubbard@nvidia.com, yuzhao@google.com,
	will@kernel.org, fenghua.yu@intel.com,
	thunder.leizhen@huawei.com, hughd@google.com,
	feng.tang@intel.com, jgg@ziepe.ca, guro@fb.com,
	tglx@linutronix.de, krisman@collabora.com,
	chris.hyser@oracle.com, pcc@google.com, ebiederm@xmission.com,
	axboe@kernel.dk, legion@kernel.org, eb@emlix.com,
	gorcunov@gmail.com, songmuchun@bytedance.com,
	viresh.kumar@linaro.org, thomascedeno@google.com,
	sashal@kernel.org, cxfcosmos@gmail.com, linux@rasmusvillemoes.dk,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-mm@kvack.org,
	kernel-team@android.com
Subject: Re: [PATCH v9 3/3] mm: add anonymous vma name refcounting
Date: Fri, 3 Sep 2021 15:20:58 -0700	[thread overview]
Message-ID: <202109031450.CDA7090A@keescook> (raw)
In-Reply-To: <20210902231813.3597709-3-surenb@google.com>

On Thu, Sep 02, 2021 at 04:18:13PM -0700, Suren Baghdasaryan wrote:
> While forking a process with high number (64K) of named anonymous vmas the
> overhead caused by strdup() is noticeable. Experiments with ARM64 Android
> device show up to 40% performance regression when forking a process with
> 64k unpopulated anonymous vmas using the max name lengths vs the same
> process with the same number of anonymous vmas having no name.
> Introduce anon_vma_name refcounted structure to avoid the overhead of
> copying vma names during fork() and when splitting named anonymous vmas.
> When a vma is duplicated, instead of copying the name we increment the
> refcount of this structure. Multiple vmas can point to the same
> anon_vma_name as long as they increment the refcount. The name member of
> anon_vma_name structure is assigned at structure allocation time and is
> never changed. If vma name changes then the refcount of the original
> structure is dropped, a new anon_vma_name structure is allocated
> to hold the new name and the vma pointer is updated to point to the new
> structure.
> With this approach the fork() performance regressions is reduced 3-4x
> times and with usecases using more reasonable number of VMAs (a few
> thousand) the regressions is not measurable.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
> previous version including cover letter with test results is at:
> https://lore.kernel.org/linux-mm/20210827191858.2037087-1-surenb@google.com/
> 
> changes in v9
> - Replaced kzalloc with kmalloc in anon_vma_name_alloc, per Rolf Eike Beer
> 
>  include/linux/mm_types.h |  9 ++++++++-
>  mm/madvise.c             | 43 +++++++++++++++++++++++++++++++++-------
>  2 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 968a1d0463d8..7feb43daee6c 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -5,6 +5,7 @@
>  #include <linux/mm_types_task.h>
>  
>  #include <linux/auxvec.h>
> +#include <linux/kref.h>
>  #include <linux/list.h>
>  #include <linux/spinlock.h>
>  #include <linux/rbtree.h>
> @@ -310,6 +311,12 @@ struct vm_userfaultfd_ctx {
>  struct vm_userfaultfd_ctx {};
>  #endif /* CONFIG_USERFAULTFD */
>  
> +struct anon_vma_name {
> +	struct kref kref;
> +	/* The name needs to be at the end because it is dynamically sized. */
> +	char name[];
> +};
> +
>  /*
>   * This struct describes a virtual memory area. There is one of these
>   * per VM-area/task. A VM area is any part of the process virtual memory
> @@ -361,7 +368,7 @@ struct vm_area_struct {
>  			unsigned long rb_subtree_last;
>  		} shared;
>  		/* Serialized by mmap_sem. */
> -		char *anon_name;
> +		struct anon_vma_name *anon_name;
>  	};
>  
>  	/*
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 0c6d0f64d432..adc53edd3fe7 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -63,6 +63,28 @@ static int madvise_need_mmap_write(int behavior)
>  	}
>  }
>  
> +static struct anon_vma_name *anon_vma_name_alloc(const char *name)
> +{
> +	struct anon_vma_name *anon_name;
> +	size_t len = strlen(name);
> +
> +	/* Add 1 for NUL terminator at the end of the anon_name->name */
> +	anon_name = kmalloc(sizeof(*anon_name) + len + 1, GFP_KERNEL);
> +	if (anon_name) {
> +		kref_init(&anon_name->kref);
> +		strcpy(anon_name->name, name);

Please don't use strcpy(), even though we know it's safe here. We're
trying to remove it globally (or at least for non-constant buffers)[1].
We can also use the struct_size() helper, along with memcpy():

	/* Add 1 for NUL terminator at the end of the anon_name->name */
	size_t count = strlen(name) + 1;

	anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL);
	if (anon_name) {
		kref_init(&anon_name->kref);
		memcpy(anon_name->name, name, count);
	}

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy

> +	}
> +
> +	return anon_name;
> +}
> +
> +static void vma_anon_name_free(struct kref *kref)
> +{
> +	struct anon_vma_name *anon_name =
> +			container_of(kref, struct anon_vma_name, kref);
> +	kfree(anon_name);
> +}
> +
>  static inline bool has_vma_anon_name(struct vm_area_struct *vma)
>  {
>  	return !vma->vm_file && vma->anon_name;
> @@ -75,7 +97,7 @@ const char *vma_anon_name(struct vm_area_struct *vma)
>  
>  	mmap_assert_locked(vma->vm_mm);
>  
> -	return vma->anon_name;
> +	return vma->anon_name->name;
>  }
>  
>  void dup_vma_anon_name(struct vm_area_struct *orig_vma,
> @@ -84,37 +106,44 @@ void dup_vma_anon_name(struct vm_area_struct *orig_vma,
>  	if (!has_vma_anon_name(orig_vma))
>  		return;
>  
> -	new_vma->anon_name = kstrdup(orig_vma->anon_name, GFP_KERNEL);
> +	kref_get(&orig_vma->anon_name->kref);
> +	new_vma->anon_name = orig_vma->anon_name;
>  }
>  
>  void free_vma_anon_name(struct vm_area_struct *vma)
>  {
> +	struct anon_vma_name *anon_name;
> +
>  	if (!has_vma_anon_name(vma))
>  		return;
>  
> -	kfree(vma->anon_name);
> +	anon_name = vma->anon_name;
>  	vma->anon_name = NULL;
> +	kref_put(&anon_name->kref, vma_anon_name_free);
>  }
>  
>  /* mmap_lock should be write-locked */
>  static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
>  {
> +	const char *anon_name;
> +
>  	if (!name) {
>  		free_vma_anon_name(vma);
>  		return 0;
>  	}
>  
> -	if (vma->anon_name) {
> +	anon_name = vma_anon_name(vma);
> +	if (anon_name) {
>  		/* Should never happen, to dup use dup_vma_anon_name() */
> -		WARN_ON(vma->anon_name == name);
> +		WARN_ON(anon_name == name);
>  
>  		/* Same name, nothing to do here */
> -		if (!strcmp(name, vma->anon_name))
> +		if (!strcmp(name, anon_name))
>  			return 0;
>  
>  		free_vma_anon_name(vma);
>  	}
> -	vma->anon_name = kstrdup(name, GFP_KERNEL);
> +	vma->anon_name = anon_vma_name_alloc(name);
>  	if (!vma->anon_name)
>  		return -ENOMEM;
>  
> -- 
> 2.33.0.153.gba50c8fa24-goog
> 

With the above tweak, please consider this:

Reviewed-by: Kees Cook <keescook@chromium.org>

Thanks for working on this!

-- 
Kees Cook

  reply	other threads:[~2021-09-03 22:21 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-02 23:18 [PATCH v9 1/3] mm: rearrange madvise code to allow for reuse Suren Baghdasaryan
2021-09-02 23:18 ` Suren Baghdasaryan
2021-09-02 23:18 ` [PATCH v9 2/3] mm: add a field to store names for private anonymous memory Suren Baghdasaryan
2021-09-02 23:18   ` Suren Baghdasaryan
2021-09-03 21:35   ` Kees Cook
2021-09-03 21:51     ` Suren Baghdasaryan
2021-09-05 13:04     ` Pavel Machek
2021-09-06 15:52       ` Suren Baghdasaryan
2021-09-03 21:47   ` Kees Cook
2021-09-03 21:56     ` Suren Baghdasaryan
2021-09-03 22:28       ` Kees Cook
2021-10-01  3:44         ` Suren Baghdasaryan
2021-10-01  5:19           ` Kees Cook
2021-09-06 16:55   ` Matthew Wilcox
2021-09-09  4:05     ` Suren Baghdasaryan
2021-09-30 18:56       ` Suren Baghdasaryan
2021-09-30 23:25         ` Kees Cook
2021-10-01  7:01   ` Rasmus Villemoes
2021-10-01 16:34     ` Suren Baghdasaryan
2021-09-02 23:18 ` [PATCH v9 3/3] mm: add anonymous vma name refcounting Suren Baghdasaryan
2021-09-02 23:18   ` Suren Baghdasaryan
2021-09-03 22:20   ` Kees Cook [this message]
2021-09-03  0:28 ` [PATCH v9 1/3] mm: rearrange madvise code to allow for reuse Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=202109031450.CDA7090A@keescook \
    --to=keescook@chromium.org \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axboe@kernel.dk \
    --cc=axelrasmussen@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=ccross@google.com \
    --cc=chinwen.chang@mediatek.com \
    --cc=chris.hyser@oracle.com \
    --cc=corbet@lwn.net \
    --cc=cxfcosmos@gmail.com \
    --cc=dave.hansen@intel.com \
    --cc=eb@emlix.com \
    --cc=ebiederm@xmission.com \
    --cc=feng.tang@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=gorcunov@gmail.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=kaleshsingh@google.com \
    --cc=kernel-team@android.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=krisman@collabora.com \
    --cc=legion@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=mhocko@suse.com \
    --cc=pcc@google.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=rppt@kernel.org \
    --cc=sashal@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=sumit.semwal@linaro.org \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomascedeno@google.com \
    --cc=thunder.leizhen@huawei.com \
    --cc=vbabka@suse.cz \
    --cc=vincenzo.frascino@arm.com \
    --cc=viresh.kumar@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.