All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] mm: anonymous shared memory naming
@ 2022-11-07 18:47 Pasha Tatashin
  2022-11-08 17:56 ` David Hildenbrand
  0 siblings, 1 reply; 5+ messages in thread
From: Pasha Tatashin @ 2022-11-07 18:47 UTC (permalink / raw)
  To: corbet, akpm, hughd, hannes, david, vincent.whitchurch, seanjc,
	rppt, shy828301, pasha.tatashin, paul.gortmaker, peterx, vbabka,
	Liam.Howlett, ccross, willy, arnd, cgel.zte, yuzhao,
	linux-kernel, linux-fsdevel, linux-doc, linux-mm, bagasdotme,
	kirill

Since: commit 9a10064f5625 ("mm: add a field to store names for private
anonymous memory"), name for private anonymous memory, but not shared
anonymous, can be set. However, naming shared anonymous memory just as
useful for tracking purposes.

Extend the functionality to be able to set names for shared anon.

Sample output:
  /* Create shared anonymous segmenet */
  anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  /* Name the segment: "MY-NAME" */
  rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
             anon_shmem, SIZE, "MY-NAME");

cat /proc/<pid>/maps (and smaps):
7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]

Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
 Documentation/filesystems/proc.rst |  8 +++++---
 fs/proc/task_mmu.c                 | 14 ++++++++++----
 include/linux/mm.h                 |  2 ++
 include/linux/mm_types.h           | 27 +++++++++++++--------------
 mm/madvise.c                       |  7 ++-----
 mm/shmem.c                         | 20 ++++++++++++++++++--
 6 files changed, 50 insertions(+), 28 deletions(-)

 Changes since v1:
 https://lore.kernel.org/lkml/20221105025342.3130038-1-pasha.tatashin@soleen.com
 - removed "path" for user named anon shared memory
 - fixed a warning found by kernel test robot
 - fixed a warning reported by Bagas Sanjaya
 - simplified and improved the commit log

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 898c99eae8e4..b8f175ae4853 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data).
 The "pathname" shows the name associated file for this mapping.  If the mapping
 is not associated with a file:
 
- =============              ====================================
+ ===================        ===========================================
  [heap]                     the heap of the program
  [stack]                    the stack of the main process
  [vdso]                     the "virtual dynamic shared object",
                             the kernel system call handler
- [anon:<name>]              an anonymous mapping that has been
+ [anon:<name>]              a private anonymous mapping that has been
                             named by userspace
- =============              ====================================
+ [anon_shmem:<name>]        an anonymous shared memory mapping that has
+                            been named by userspace
+ ===================        ===========================================
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8a74cdcc9af0..d22687d2e81e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -277,6 +277,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
 	vm_flags_t flags = vma->vm_flags;
+	struct anon_vma_name *anon_name;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
 	unsigned long start, end;
@@ -293,6 +294,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	start = vma->vm_start;
 	end = vma->vm_end;
 	show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
+	anon_name = anon_vma_name(vma);
 
 	/*
 	 * Print the dentry name for named mappings, and a
@@ -300,7 +302,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	 */
 	if (file) {
 		seq_pad(m, ' ');
-		seq_file_path(m, file, "\n");
+		/*
+		 * If user named this anon shared memory via
+		 * prctl(PR_SET_VMA ..., use the provided name.
+		 */
+		if (anon_name)
+			seq_printf(m, "[anon_shmem:%s]", anon_name->name);
+		else
+			seq_file_path(m, file, "\n");
 		goto done;
 	}
 
@@ -312,8 +321,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 
 	name = arch_vma_name(vma);
 	if (!name) {
-		struct anon_vma_name *anon_name;
-
 		if (!mm) {
 			name = "[vdso]";
 			goto done;
@@ -330,7 +337,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 			goto done;
 		}
 
-		anon_name = anon_vma_name(vma);
 		if (anon_name) {
 			seq_pad(m, ' ');
 			seq_printf(m, "[anon:%s]", anon_name->name);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..06b6fb3277ab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -699,8 +699,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
  * paths in userfault.
  */
 bool vma_is_shmem(struct vm_area_struct *vma);
+bool vma_is_anon_shmem(struct vm_area_struct *vma);
 #else
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
 int vma_is_stack_for_current(struct vm_area_struct *vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 500e536796ca..08d8b973fb60 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,21 +461,11 @@ struct vm_area_struct {
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree.
 	 *
-	 * For private anonymous mappings, a pointer to a null terminated string
-	 * containing the name given to the vma, or NULL if unnamed.
 	 */
-
-	union {
-		struct {
-			struct rb_node rb;
-			unsigned long rb_subtree_last;
-		} shared;
-		/*
-		 * Serialized by mmap_sem. Never use directly because it is
-		 * valid only when vm_file is NULL. Use anon_vma_name instead.
-		 */
-		struct anon_vma_name *anon_name;
-	};
+	struct {
+		struct rb_node rb;
+		unsigned long rb_subtree_last;
+	} shared;
 
 	/*
 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -485,6 +475,7 @@ struct vm_area_struct {
 	 */
 	struct list_head anon_vma_chain; /* Serialized by mmap_lock &
 					  * page_table_lock */
+
 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
 
 	/* Function pointers to deal with this struct. */
@@ -496,6 +487,14 @@ struct vm_area_struct {
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
+#ifdef CONFIG_ANON_VMA_NAME
+	/*
+	 * For private and shared anonymous mappings, a pointer to a null
+	 * terminated string containing the name given to the vma, or NULL if
+	 * unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
+	 */
+	struct anon_vma_name *anon_name;
+#endif
 #ifdef CONFIG_SWAP
 	atomic_long_t swap_readahead_info;
 #endif
diff --git a/mm/madvise.c b/mm/madvise.c
index c7105ec6d08c..255d5b485432 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
 {
 	mmap_assert_locked(vma->vm_mm);
 
-	if (vma->vm_file)
-		return NULL;
-
 	return vma->anon_name;
 }
 
@@ -183,7 +180,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
 	 * vm_flags is protected by the mmap_lock held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-	if (!vma->vm_file) {
+	if (!vma->vm_file || vma_is_anon_shmem(vma)) {
 		error = replace_anon_vma_name(vma, anon_name);
 		if (error)
 			return error;
@@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
 	int error;
 
 	/* Only anonymous mappings can be named */
-	if (vma->vm_file)
+	if (vma->vm_file && !vma_is_anon_shmem(vma))
 		return -EBADF;
 
 	error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
diff --git a/mm/shmem.c b/mm/shmem.c
index c1d8b8a1aa3b..a6482cadda79 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
+static const struct vm_operations_struct shmem_anon_vm_ops;
 static struct file_system_type shmem_fs_type;
 
+bool vma_is_anon_shmem(struct vm_area_struct *vma)
+{
+	return vma->vm_ops == &shmem_anon_vm_ops;
+}
+
 bool vma_is_shmem(struct vm_area_struct *vma)
 {
-	return vma->vm_ops == &shmem_vm_ops;
+	return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
 }
 
 static LIST_HEAD(shmem_swaplist);
@@ -3995,6 +4001,15 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
+static const struct vm_operations_struct shmem_anon_vm_ops = {
+	.fault		= shmem_fault,
+	.map_pages	= filemap_map_pages,
+#ifdef CONFIG_NUMA
+	.set_policy     = shmem_set_policy,
+	.get_policy     = shmem_get_policy,
+#endif
+};
+
 int shmem_init_fs_context(struct fs_context *fc)
 {
 	struct shmem_options *ctx;
@@ -4170,6 +4185,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
 #define shmem_vm_ops				generic_file_vm_ops
+#define shmem_anon_vm_ops			generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
@@ -4275,7 +4291,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
-	vma->vm_ops = &shmem_vm_ops;
+	vma->vm_ops = &shmem_anon_vm_ops;
 
 	return 0;
 }
-- 
2.38.1.431.g37b22c650d-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] mm: anonymous shared memory naming
  2022-11-07 18:47 [PATCH v2] mm: anonymous shared memory naming Pasha Tatashin
@ 2022-11-08 17:56 ` David Hildenbrand
  2022-11-08 18:44   ` Pasha Tatashin
  0 siblings, 1 reply; 5+ messages in thread
From: David Hildenbrand @ 2022-11-08 17:56 UTC (permalink / raw)
  To: Pasha Tatashin, corbet, akpm, hughd, hannes, vincent.whitchurch,
	seanjc, rppt, shy828301, paul.gortmaker, peterx, vbabka,
	Liam.Howlett, ccross, willy, arnd, cgel.zte, yuzhao,
	linux-kernel, linux-fsdevel, linux-doc, linux-mm, bagasdotme,
	kirill

On 07.11.22 19:47, Pasha Tatashin wrote:
> Since: commit 9a10064f5625 ("mm: add a field to store names for private
> anonymous memory"), name for private anonymous memory, but not shared
> anonymous, can be set. However, naming shared anonymous memory just as

                                                                  ^ is

> useful for tracking purposes.
> 
> Extend the functionality to be able to set names for shared anon.
> 
> Sample output:
>    /* Create shared anonymous segmenet */

s/segmenet/segment/

>    anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
>                      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>    /* Name the segment: "MY-NAME" */
>    rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
>               anon_shmem, SIZE, "MY-NAME");
> 
> cat /proc/<pid>/maps (and smaps):
> 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]

What would it have looked like before? Just no additional information?

> 
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---


[...]

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 8bbcccbc5565..06b6fb3277ab 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -699,8 +699,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
>    * paths in userfault.
>    */
>   bool vma_is_shmem(struct vm_area_struct *vma);
> +bool vma_is_anon_shmem(struct vm_area_struct *vma);
>   #else
>   static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
> +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
>   #endif
>   
>   int vma_is_stack_for_current(struct vm_area_struct *vma);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 500e536796ca..08d8b973fb60 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -461,21 +461,11 @@ struct vm_area_struct {
>   	 * For areas with an address space and backing store,
>   	 * linkage into the address_space->i_mmap interval tree.
>   	 *
> -	 * For private anonymous mappings, a pointer to a null terminated string
> -	 * containing the name given to the vma, or NULL if unnamed.
>   	 */
> -
> -	union {
> -		struct {
> -			struct rb_node rb;
> -			unsigned long rb_subtree_last;
> -		} shared;
> -		/*
> -		 * Serialized by mmap_sem. Never use directly because it is
> -		 * valid only when vm_file is NULL. Use anon_vma_name instead.
> -		 */
> -		struct anon_vma_name *anon_name;
> -	};
> +	struct {
> +		struct rb_node rb;
> +		unsigned long rb_subtree_last;
> +	} shared;
>   

So that effectively grows the size of vm_area_struct. Hm. I'd really 
prefer to keep this specific to actual anonymous memory, not extending 
it to anonymous files.

Do we have any *actual* users where we don't have an alternative? I 
doubt that this is really required.

The simplest approach seems to be to use memfd instead of MAP_SHARED | 
MAP_ANONYMOUS. __NR_memfd_create can be passed a name and you get what 
you propose here effectively already. Or does anything speak against it?

-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] mm: anonymous shared memory naming
  2022-11-08 17:56 ` David Hildenbrand
@ 2022-11-08 18:44   ` Pasha Tatashin
  2022-11-09 10:11     ` David Hildenbrand
  0 siblings, 1 reply; 5+ messages in thread
From: Pasha Tatashin @ 2022-11-08 18:44 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: corbet, akpm, hughd, hannes, vincent.whitchurch, seanjc, rppt,
	shy828301, paul.gortmaker, peterx, vbabka, Liam.Howlett, ccross,
	willy, arnd, cgel.zte, yuzhao, linux-kernel, linux-fsdevel,
	linux-doc, linux-mm, bagasdotme, kirill

Hi David,

Thank you for taking a look at this change:

On Tue, Nov 8, 2022 at 12:56 PM David Hildenbrand <david@redhat.com> wrote:
>
> On 07.11.22 19:47, Pasha Tatashin wrote:
> > Since: commit 9a10064f5625 ("mm: add a field to store names for private
> > anonymous memory"), name for private anonymous memory, but not shared
> > anonymous, can be set. However, naming shared anonymous memory just as
>
>                                                                   ^ is
>

OK

> > useful for tracking purposes.
> >
> > Extend the functionality to be able to set names for shared anon.
> >
> > Sample output:
> >    /* Create shared anonymous segmenet */
>
> s/segmenet/segment/

Ok

>
> >    anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
> >                      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> >    /* Name the segment: "MY-NAME" */
> >    rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
> >               anon_shmem, SIZE, "MY-NAME");
> >
> > cat /proc/<pid>/maps (and smaps):
> > 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]
>
> What would it have looked like before? Just no additional information?

Before:

7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted)

Pasha

>
> >
> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> > ---
>
>
> [...]
>
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 8bbcccbc5565..06b6fb3277ab 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -699,8 +699,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
> >    * paths in userfault.
> >    */
> >   bool vma_is_shmem(struct vm_area_struct *vma);
> > +bool vma_is_anon_shmem(struct vm_area_struct *vma);
> >   #else
> >   static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
> > +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
> >   #endif
> >
> >   int vma_is_stack_for_current(struct vm_area_struct *vma);
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 500e536796ca..08d8b973fb60 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -461,21 +461,11 @@ struct vm_area_struct {
> >        * For areas with an address space and backing store,
> >        * linkage into the address_space->i_mmap interval tree.
> >        *
> > -      * For private anonymous mappings, a pointer to a null terminated string
> > -      * containing the name given to the vma, or NULL if unnamed.
> >        */
> > -
> > -     union {
> > -             struct {
> > -                     struct rb_node rb;
> > -                     unsigned long rb_subtree_last;
> > -             } shared;
> > -             /*
> > -              * Serialized by mmap_sem. Never use directly because it is
> > -              * valid only when vm_file is NULL. Use anon_vma_name instead.
> > -              */
> > -             struct anon_vma_name *anon_name;
> > -     };
> > +     struct {
> > +             struct rb_node rb;
> > +             unsigned long rb_subtree_last;
> > +     } shared;
> >
>
> So that effectively grows the size of vm_area_struct. Hm. I'd really
> prefer to keep this specific to actual anonymous memory, not extending
> it to anonymous files.

It grows only when CONFIG_ANON_VMA_NAME=y, otherwise it stays the same
as before. Are you suggesting adding another config specifically for
shared memory? I wonder if we could add a union for some other part of
vm_area_struct where anon and file cannot be used together.

> Do we have any *actual* users where we don't have an alternative? I
> doubt that this is really required.
>
> The simplest approach seems to be to use memfd instead of MAP_SHARED |
> MAP_ANONYMOUS. __NR_memfd_create can be passed a name and you get what
> you propose here effectively already. Or does anything speak against it?

For our use case the above does not work. We are working on highly
paravirtualized virtual machines. The VMM maps VM memory as anonymous
shared memory (not private because VMM is sandboxed and drivers are
running in their own processes). However, the VM tells back to the VMM
how parts of the memory are actually used by the guest, how each of
the segments should be backed (i.e. 4K pages, 2M pages), and some
other information about the segments. The naming allows us to monitor
the effective memory footprint for each of these segments from the
host without looking inside the guest.

Pasha

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] mm: anonymous shared memory naming
  2022-11-08 18:44   ` Pasha Tatashin
@ 2022-11-09 10:11     ` David Hildenbrand
  2022-11-15  1:36       ` Pasha Tatashin
  0 siblings, 1 reply; 5+ messages in thread
From: David Hildenbrand @ 2022-11-09 10:11 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: corbet, akpm, hughd, hannes, vincent.whitchurch, seanjc, rppt,
	shy828301, paul.gortmaker, peterx, vbabka, Liam.Howlett, ccross,
	willy, arnd, cgel.zte, yuzhao, linux-kernel, linux-fsdevel,
	linux-doc, linux-mm, bagasdotme, kirill

>>
>>>     anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
>>>                       MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>>>     /* Name the segment: "MY-NAME" */
>>>     rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
>>>                anon_shmem, SIZE, "MY-NAME");
>>>
>>> cat /proc/<pid>/maps (and smaps):
>>> 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]
>>
>> What would it have looked like before? Just no additional information?
> 
> Before:
> 
> 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted)

Can we add that to the patch description?

>>
>>>
>>> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>>> ---
>>
>>
>> [...]
>>
>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>> index 8bbcccbc5565..06b6fb3277ab 100644
>>> --- a/include/linux/mm.h
>>> +++ b/include/linux/mm.h
>>> @@ -699,8 +699,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
>>>     * paths in userfault.
>>>     */
>>>    bool vma_is_shmem(struct vm_area_struct *vma);
>>> +bool vma_is_anon_shmem(struct vm_area_struct *vma);
>>>    #else
>>>    static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
>>> +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
>>>    #endif
>>>
>>>    int vma_is_stack_for_current(struct vm_area_struct *vma);
>>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>>> index 500e536796ca..08d8b973fb60 100644
>>> --- a/include/linux/mm_types.h
>>> +++ b/include/linux/mm_types.h
>>> @@ -461,21 +461,11 @@ struct vm_area_struct {
>>>         * For areas with an address space and backing store,
>>>         * linkage into the address_space->i_mmap interval tree.
>>>         *
>>> -      * For private anonymous mappings, a pointer to a null terminated string
>>> -      * containing the name given to the vma, or NULL if unnamed.
>>>         */
>>> -
>>> -     union {
>>> -             struct {
>>> -                     struct rb_node rb;
>>> -                     unsigned long rb_subtree_last;
>>> -             } shared;
>>> -             /*
>>> -              * Serialized by mmap_sem. Never use directly because it is
>>> -              * valid only when vm_file is NULL. Use anon_vma_name instead.
>>> -              */
>>> -             struct anon_vma_name *anon_name;
>>> -     };
>>> +     struct {
>>> +             struct rb_node rb;
>>> +             unsigned long rb_subtree_last;
>>> +     } shared;
>>>
>>
>> So that effectively grows the size of vm_area_struct. Hm. I'd really
>> prefer to keep this specific to actual anonymous memory, not extending
>> it to anonymous files.
> 
> It grows only when CONFIG_ANON_VMA_NAME=y, otherwise it stays the same
> as before. Are you suggesting adding another config specifically for
> shared memory? I wonder if we could add a union for some other part of
> vm_area_struct where anon and file cannot be used together.

In practice, all distributions will enable CONFIG_ANON_VMA_NAME in the 
long term I guess. So if we could avoid increasing the VMA size, that 
would be great.

> 
>> Do we have any *actual* users where we don't have an alternative? I
>> doubt that this is really required.
>>
>> The simplest approach seems to be to use memfd instead of MAP_SHARED |
>> MAP_ANONYMOUS. __NR_memfd_create can be passed a name and you get what
>> you propose here effectively already. Or does anything speak against it?
> 
> For our use case the above does not work. We are working on highly
> paravirtualized virtual machines. The VMM maps VM memory as anonymous
> shared memory (not private because VMM is sandboxed and drivers are
> running in their own processes). However, the VM tells back to the VMM
> how parts of the memory are actually used by the guest, how each of
> the segments should be backed (i.e. 4K pages, 2M pages), and some
> other information about the segments. The naming allows us to monitor
> the effective memory footprint for each of these segments from the
> host without looking inside the guest.

That's a reasonable use case, although naive me would worry about #VMA 
limits etc.

Can you add some condensed use-case explanation to the patch 
description? (IOW, memfd cannot be used because parts of the memfd are 
required to receive distinct names)

I'd appreciate if we could avoid increasing the VMA size; but in any case

Acked-by: David Hildenbrand <david@redhat.com>


-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] mm: anonymous shared memory naming
  2022-11-09 10:11     ` David Hildenbrand
@ 2022-11-15  1:36       ` Pasha Tatashin
  0 siblings, 0 replies; 5+ messages in thread
From: Pasha Tatashin @ 2022-11-15  1:36 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: corbet, akpm, hughd, hannes, vincent.whitchurch, seanjc, rppt,
	shy828301, paul.gortmaker, peterx, vbabka, Liam.Howlett, ccross,
	willy, arnd, cgel.zte, yuzhao, linux-kernel, linux-fsdevel,
	linux-doc, linux-mm, bagasdotme, kirill

On Wed, Nov 9, 2022 at 5:11 AM David Hildenbrand <david@redhat.com> wrote:
>
> >>
> >>>     anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
> >>>                       MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> >>>     /* Name the segment: "MY-NAME" */
> >>>     rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
> >>>                anon_shmem, SIZE, "MY-NAME");
> >>>
> >>> cat /proc/<pid>/maps (and smaps):
> >>> 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]
> >>
> >> What would it have looked like before? Just no additional information?
> >
> > Before:
> >
> > 7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted)
>
> Can we add that to the patch description?
>
> >>
> >>>
> >>> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> >>> ---
> >>
> >>
> >> [...]
> >>
> >>> diff --git a/include/linux/mm.h b/include/linux/mm.h
> >>> index 8bbcccbc5565..06b6fb3277ab 100644
> >>> --- a/include/linux/mm.h
> >>> +++ b/include/linux/mm.h
> >>> @@ -699,8 +699,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
> >>>     * paths in userfault.
> >>>     */
> >>>    bool vma_is_shmem(struct vm_area_struct *vma);
> >>> +bool vma_is_anon_shmem(struct vm_area_struct *vma);
> >>>    #else
> >>>    static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
> >>> +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
> >>>    #endif
> >>>
> >>>    int vma_is_stack_for_current(struct vm_area_struct *vma);
> >>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> >>> index 500e536796ca..08d8b973fb60 100644
> >>> --- a/include/linux/mm_types.h
> >>> +++ b/include/linux/mm_types.h
> >>> @@ -461,21 +461,11 @@ struct vm_area_struct {
> >>>         * For areas with an address space and backing store,
> >>>         * linkage into the address_space->i_mmap interval tree.
> >>>         *
> >>> -      * For private anonymous mappings, a pointer to a null terminated string
> >>> -      * containing the name given to the vma, or NULL if unnamed.
> >>>         */
> >>> -
> >>> -     union {
> >>> -             struct {
> >>> -                     struct rb_node rb;
> >>> -                     unsigned long rb_subtree_last;
> >>> -             } shared;
> >>> -             /*
> >>> -              * Serialized by mmap_sem. Never use directly because it is
> >>> -              * valid only when vm_file is NULL. Use anon_vma_name instead.
> >>> -              */
> >>> -             struct anon_vma_name *anon_name;
> >>> -     };
> >>> +     struct {
> >>> +             struct rb_node rb;
> >>> +             unsigned long rb_subtree_last;
> >>> +     } shared;
> >>>
> >>
> >> So that effectively grows the size of vm_area_struct. Hm. I'd really
> >> prefer to keep this specific to actual anonymous memory, not extending
> >> it to anonymous files.
> >
> > It grows only when CONFIG_ANON_VMA_NAME=y, otherwise it stays the same
> > as before. Are you suggesting adding another config specifically for
> > shared memory? I wonder if we could add a union for some other part of
> > vm_area_struct where anon and file cannot be used together.
>
> In practice, all distributions will enable CONFIG_ANON_VMA_NAME in the
> long term I guess. So if we could avoid increasing the VMA size, that
> would be great.
>
> >
> >> Do we have any *actual* users where we don't have an alternative? I
> >> doubt that this is really required.
> >>
> >> The simplest approach seems to be to use memfd instead of MAP_SHARED |
> >> MAP_ANONYMOUS. __NR_memfd_create can be passed a name and you get what
> >> you propose here effectively already. Or does anything speak against it?
> >
> > For our use case the above does not work. We are working on highly
> > paravirtualized virtual machines. The VMM maps VM memory as anonymous
> > shared memory (not private because VMM is sandboxed and drivers are
> > running in their own processes). However, the VM tells back to the VMM
> > how parts of the memory are actually used by the guest, how each of
> > the segments should be backed (i.e. 4K pages, 2M pages), and some
> > other information about the segments. The naming allows us to monitor
> > the effective memory footprint for each of these segments from the
> > host without looking inside the guest.
>
> That's a reasonable use case, although naive me would worry about #VMA
> limits etc.
>
> Can you add some condensed use-case explanation to the patch
> description? (IOW, memfd cannot be used because parts of the memfd are
> required to receive distinct names)
>
> I'd appreciate if we could avoid increasing the VMA size; but in any case

I've explored ways not to increase VMA size, but there are no obvious
solutions here. Let's keep it as is for now, and in the future if
there we are going to be adding some fields that are only used by
anonymous memory, we can explore of adding a union for this field.

>
> Acked-by: David Hildenbrand <david@redhat.com>

Thank you. I will soon send a new version with support for memfd anon
memory as well.

Pasha

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-11-15  1:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-07 18:47 [PATCH v2] mm: anonymous shared memory naming Pasha Tatashin
2022-11-08 17:56 ` David Hildenbrand
2022-11-08 18:44   ` Pasha Tatashin
2022-11-09 10:11     ` David Hildenbrand
2022-11-15  1:36       ` Pasha Tatashin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.