All of lore.kernel.org
 help / color / mirror / Atom feed
* + pid-delete-struct-pidmap-nr_free.patch added to -mm tree
@ 2017-10-02 23:13 akpm
  2017-10-03 16:15 ` Oleg Nesterov
  0 siblings, 1 reply; 4+ messages in thread
From: akpm @ 2017-10-02 23:13 UTC (permalink / raw)
  To: adobriyan, ebiederm, keescook, oleg, mm-commits


The patch titled
     Subject: pid: delete struct pidmap::nr_free
has been added to the -mm tree.  Its filename is
     pid-delete-struct-pidmap-nr_free.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/pid-delete-struct-pidmap-nr_free.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/pid-delete-struct-pidmap-nr_free.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Alexey Dobriyan <adobriyan@gmail.com>
Subject: pid: delete struct pidmap::nr_free

There is a check in pid allocation code to skip a full page:

	if (likely(atomic_read(&map->nr_free))) {
		...

In practice it doesn't do anything.  To skip a pidmap page one has to have
32K consecutive pids allocated at the same time which doesn't happen.

Currently the price is _every_ fork/exit on every system being slower than
necessary.

Link: http://lkml.kernel.org/r/20170909203649.GB4791@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/pid_namespace.h |    1 -
 kernel/pid.c                  |   28 ++++++++++------------------
 kernel/pid_namespace.c        |    6 ------
 3 files changed, 10 insertions(+), 25 deletions(-)

diff -puN include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free include/linux/pid_namespace.h
--- a/include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free
+++ a/include/linux/pid_namespace.h
@@ -11,7 +11,6 @@
 #include <linux/ns_common.h>
 
 struct pidmap {
-       atomic_t nr_free;
        void *page;
 };
 
diff -puN kernel/pid.c~pid-delete-struct-pidmap-nr_free kernel/pid.c
--- a/kernel/pid.c~pid-delete-struct-pidmap-nr_free
+++ a/kernel/pid.c
@@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_name
  */
 struct pid_namespace init_pid_ns = {
 	.kref = KREF_INIT(2),
-	.pidmap = {
-		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-	},
 	.last_pid = 0,
 	.nr_hashed = PIDNS_HASH_ADDING,
 	.level = 0,
@@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upi
 	int offset = nr & BITS_PER_PAGE_MASK;
 
 	clear_bit(offset, map->page);
-	atomic_inc(&map->nr_free);
 }
 
 /*
@@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_names
 			if (unlikely(!map->page))
 				return -ENOMEM;
 		}
-		if (likely(atomic_read(&map->nr_free))) {
-			for ( ; ; ) {
-				if (!test_and_set_bit(offset, map->page)) {
-					atomic_dec(&map->nr_free);
-					set_last_pid(pid_ns, last, pid);
-					return pid;
-				}
-				offset = find_next_offset(map, offset);
-				if (offset >= BITS_PER_PAGE)
-					break;
-				pid = mk_pid(pid_ns, map, offset);
-				if (pid >= pid_max)
-					break;
+		for (;;) {
+			if (!test_and_set_bit(offset, map->page)) {
+				set_last_pid(pid_ns, last, pid);
+				return pid;
 			}
+			offset = find_next_offset(map, offset);
+			if (offset >= BITS_PER_PAGE)
+				break;
+			pid = mk_pid(pid_ns, map, offset);
+			if (pid >= pid_max)
+				break;
 		}
 		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
 			++map;
@@ -591,7 +584,6 @@ void __init pidmap_init(void)
 	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
-	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
diff -puN kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free kernel/pid_namespace.c
--- a/kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free
+++ a/kernel/pid_namespace.c
@@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_
 	struct pid_namespace *ns;
 	unsigned int level = parent_pid_ns->level + 1;
 	struct ucounts *ucounts;
-	int i;
 	int err;
 
 	err = -EINVAL;
@@ -139,11 +138,6 @@ static struct pid_namespace *create_pid_
 	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
-	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
-
-	for (i = 1; i < PIDMAP_ENTRIES; i++)
-		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: + pid-delete-struct-pidmap-nr_free.patch added to -mm tree
  2017-10-02 23:13 + pid-delete-struct-pidmap-nr_free.patch added to -mm tree akpm
@ 2017-10-03 16:15 ` Oleg Nesterov
  2017-10-03 17:39   ` Gargi Sharma
  0 siblings, 1 reply; 4+ messages in thread
From: Oleg Nesterov @ 2017-10-03 16:15 UTC (permalink / raw)
  To: akpm
  Cc: adobriyan, ebiederm, keescook, mm-commits, linux-kernel,
	Gargi Sharma, Rik van Riel

On 10/02, Andrew Morton wrote:
>
> From: Alexey Dobriyan <adobriyan@gmail.com>
> Subject: pid: delete struct pidmap::nr_free
> 
> There is a check in pid allocation code to skip a full page:
> 
> 	if (likely(atomic_read(&map->nr_free))) {
> 		...
> 
> In practice it doesn't do anything.  To skip a pidmap page one has to have
> 32K consecutive pids allocated at the same time which doesn't happen.
> 
> Currently the price is _every_ fork/exit on every system being slower than
> necessary.

Agreed, I too never understood how can this counter help.

Add Gargi and Rik, the next version of "Replace PID bitmap allocation with IDR
API" can conflict with this and the previous pid-delete-reserved_pids.patch.

Oleg.

> 
> Link: http://lkml.kernel.org/r/20170909203649.GB4791@avx2
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
> 
>  include/linux/pid_namespace.h |    1 -
>  kernel/pid.c                  |   28 ++++++++++------------------
>  kernel/pid_namespace.c        |    6 ------
>  3 files changed, 10 insertions(+), 25 deletions(-)
> 
> diff -puN include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free include/linux/pid_namespace.h
> --- a/include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free
> +++ a/include/linux/pid_namespace.h
> @@ -11,7 +11,6 @@
>  #include <linux/ns_common.h>
>  
>  struct pidmap {
> -       atomic_t nr_free;
>         void *page;
>  };
>  
> diff -puN kernel/pid.c~pid-delete-struct-pidmap-nr_free kernel/pid.c
> --- a/kernel/pid.c~pid-delete-struct-pidmap-nr_free
> +++ a/kernel/pid.c
> @@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_name
>   */
>  struct pid_namespace init_pid_ns = {
>  	.kref = KREF_INIT(2),
> -	.pidmap = {
> -		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
> -	},
>  	.last_pid = 0,
>  	.nr_hashed = PIDNS_HASH_ADDING,
>  	.level = 0,
> @@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upi
>  	int offset = nr & BITS_PER_PAGE_MASK;
>  
>  	clear_bit(offset, map->page);
> -	atomic_inc(&map->nr_free);
>  }
>  
>  /*
> @@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_names
>  			if (unlikely(!map->page))
>  				return -ENOMEM;
>  		}
> -		if (likely(atomic_read(&map->nr_free))) {
> -			for ( ; ; ) {
> -				if (!test_and_set_bit(offset, map->page)) {
> -					atomic_dec(&map->nr_free);
> -					set_last_pid(pid_ns, last, pid);
> -					return pid;
> -				}
> -				offset = find_next_offset(map, offset);
> -				if (offset >= BITS_PER_PAGE)
> -					break;
> -				pid = mk_pid(pid_ns, map, offset);
> -				if (pid >= pid_max)
> -					break;
> +		for (;;) {
> +			if (!test_and_set_bit(offset, map->page)) {
> +				set_last_pid(pid_ns, last, pid);
> +				return pid;
>  			}
> +			offset = find_next_offset(map, offset);
> +			if (offset >= BITS_PER_PAGE)
> +				break;
> +			pid = mk_pid(pid_ns, map, offset);
> +			if (pid >= pid_max)
> +				break;
>  		}
>  		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
>  			++map;
> @@ -591,7 +584,6 @@ void __init pidmap_init(void)
>  	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
>  	/* Reserve PID 0. We never call free_pidmap(0) */
>  	set_bit(0, init_pid_ns.pidmap[0].page);
> -	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
>  
>  	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
>  			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
> diff -puN kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free kernel/pid_namespace.c
> --- a/kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free
> +++ a/kernel/pid_namespace.c
> @@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_
>  	struct pid_namespace *ns;
>  	unsigned int level = parent_pid_ns->level + 1;
>  	struct ucounts *ucounts;
> -	int i;
>  	int err;
>  
>  	err = -EINVAL;
> @@ -139,11 +138,6 @@ static struct pid_namespace *create_pid_
>  	INIT_WORK(&ns->proc_work, proc_cleanup_work);
>  
>  	set_bit(0, ns->pidmap[0].page);
> -	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
> -
> -	for (i = 1; i < PIDMAP_ENTRIES; i++)
> -		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
> -
>  	return ns;
>  
>  out_free_map:
> _
> 
> Patches currently in -mm which might be from adobriyan@gmail.com are
> 
> proc-uninline-name_to_int.patch
> proc-use-do-while-in-name_to_int.patch
> seq_file-delete-small-value-optimization.patch
> pid-delete-reserved_pids.patch
> pid-delete-struct-pidmap-nr_free.patch
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: + pid-delete-struct-pidmap-nr_free.patch added to -mm tree
  2017-10-03 16:15 ` Oleg Nesterov
@ 2017-10-03 17:39   ` Gargi Sharma
  2017-10-03 21:56     ` Alexey Dobriyan
  0 siblings, 1 reply; 4+ messages in thread
From: Gargi Sharma @ 2017-10-03 17:39 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: akpm, adobriyan, Eric W. Biederman, keescook, mm-commits,
	linux-kernel, Rik van Riel

On Tue, Oct 3, 2017 at 9:45 PM, Oleg Nesterov <oleg@redhat.com> wrote:
> On 10/02, Andrew Morton wrote:
>>
>> From: Alexey Dobriyan <adobriyan@gmail.com>
>> Subject: pid: delete struct pidmap::nr_free
>>
>> There is a check in pid allocation code to skip a full page:
>>
>>       if (likely(atomic_read(&map->nr_free))) {
>>               ...
>>
>> In practice it doesn't do anything.  To skip a pidmap page one has to have
>> 32K consecutive pids allocated at the same time which doesn't happen.
>>
>> Currently the price is _every_ fork/exit on every system being slower than
>> necessary.
>
> Agreed, I too never understood how can this counter help.
>
> Add Gargi and Rik, the next version of "Replace PID bitmap allocation with IDR
> API" can conflict with this and the previous pid-delete-reserved_pids.patch.

I think this patch will become obsolete as pidmap will be removed. As
for the 1/2
patch of Alexey's series, I'll incorporate it so that rolled over PIDs
start from 1
instead of RESERVED_PIDS.

Thanks!
Gargi
>
> Oleg.
>
>>
>> Link: http://lkml.kernel.org/r/20170909203649.GB4791@avx2
>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
>> Cc: Kees Cook <keescook@chromium.org>
>> Cc: Oleg Nesterov <oleg@redhat.com>
>> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
>> ---
>>
>>  include/linux/pid_namespace.h |    1 -
>>  kernel/pid.c                  |   28 ++++++++++------------------
>>  kernel/pid_namespace.c        |    6 ------
>>  3 files changed, 10 insertions(+), 25 deletions(-)
>>
>> diff -puN include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free include/linux/pid_namespace.h
>> --- a/include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free
>> +++ a/include/linux/pid_namespace.h
>> @@ -11,7 +11,6 @@
>>  #include <linux/ns_common.h>
>>
>>  struct pidmap {
>> -       atomic_t nr_free;
>>         void *page;
>>  };
>>
>> diff -puN kernel/pid.c~pid-delete-struct-pidmap-nr_free kernel/pid.c
>> --- a/kernel/pid.c~pid-delete-struct-pidmap-nr_free
>> +++ a/kernel/pid.c
>> @@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_name
>>   */
>>  struct pid_namespace init_pid_ns = {
>>       .kref = KREF_INIT(2),
>> -     .pidmap = {
>> -             [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
>> -     },
>>       .last_pid = 0,
>>       .nr_hashed = PIDNS_HASH_ADDING,
>>       .level = 0,
>> @@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upi
>>       int offset = nr & BITS_PER_PAGE_MASK;
>>
>>       clear_bit(offset, map->page);
>> -     atomic_inc(&map->nr_free);
>>  }
>>
>>  /*
>> @@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_names
>>                       if (unlikely(!map->page))
>>                               return -ENOMEM;
>>               }
>> -             if (likely(atomic_read(&map->nr_free))) {
>> -                     for ( ; ; ) {
>> -                             if (!test_and_set_bit(offset, map->page)) {
>> -                                     atomic_dec(&map->nr_free);
>> -                                     set_last_pid(pid_ns, last, pid);
>> -                                     return pid;
>> -                             }
>> -                             offset = find_next_offset(map, offset);
>> -                             if (offset >= BITS_PER_PAGE)
>> -                                     break;
>> -                             pid = mk_pid(pid_ns, map, offset);
>> -                             if (pid >= pid_max)
>> -                                     break;
>> +             for (;;) {
>> +                     if (!test_and_set_bit(offset, map->page)) {
>> +                             set_last_pid(pid_ns, last, pid);
>> +                             return pid;
>>                       }
>> +                     offset = find_next_offset(map, offset);
>> +                     if (offset >= BITS_PER_PAGE)
>> +                             break;
>> +                     pid = mk_pid(pid_ns, map, offset);
>> +                     if (pid >= pid_max)
>> +                             break;
>>               }
>>               if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
>>                       ++map;
>> @@ -591,7 +584,6 @@ void __init pidmap_init(void)
>>       init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
>>       /* Reserve PID 0. We never call free_pidmap(0) */
>>       set_bit(0, init_pid_ns.pidmap[0].page);
>> -     atomic_dec(&init_pid_ns.pidmap[0].nr_free);
>>
>>       init_pid_ns.pid_cachep = KMEM_CACHE(pid,
>>                       SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
>> diff -puN kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free kernel/pid_namespace.c
>> --- a/kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free
>> +++ a/kernel/pid_namespace.c
>> @@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_
>>       struct pid_namespace *ns;
>>       unsigned int level = parent_pid_ns->level + 1;
>>       struct ucounts *ucounts;
>> -     int i;
>>       int err;
>>
>>       err = -EINVAL;
>> @@ -139,11 +138,6 @@ static struct pid_namespace *create_pid_
>>       INIT_WORK(&ns->proc_work, proc_cleanup_work);
>>
>>       set_bit(0, ns->pidmap[0].page);
>> -     atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
>> -
>> -     for (i = 1; i < PIDMAP_ENTRIES; i++)
>> -             atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
>> -
>>       return ns;
>>
>>  out_free_map:
>> _
>>
>> Patches currently in -mm which might be from adobriyan@gmail.com are
>>
>> proc-uninline-name_to_int.patch
>> proc-use-do-while-in-name_to_int.patch
>> seq_file-delete-small-value-optimization.patch
>> pid-delete-reserved_pids.patch
>> pid-delete-struct-pidmap-nr_free.patch
>>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: + pid-delete-struct-pidmap-nr_free.patch added to -mm tree
  2017-10-03 17:39   ` Gargi Sharma
@ 2017-10-03 21:56     ` Alexey Dobriyan
  0 siblings, 0 replies; 4+ messages in thread
From: Alexey Dobriyan @ 2017-10-03 21:56 UTC (permalink / raw)
  To: Gargi Sharma
  Cc: Oleg Nesterov, akpm, Eric W. Biederman, keescook, mm-commits,
	linux-kernel, Rik van Riel

On Tue, Oct 03, 2017 at 11:09:08PM +0530, Gargi Sharma wrote:
> On Tue, Oct 3, 2017 at 9:45 PM, Oleg Nesterov <oleg@redhat.com> wrote:
> > On 10/02, Andrew Morton wrote:
> >>
> >> From: Alexey Dobriyan <adobriyan@gmail.com>
> >> Subject: pid: delete struct pidmap::nr_free
> >>
> >> There is a check in pid allocation code to skip a full page:
> >>
> >>       if (likely(atomic_read(&map->nr_free))) {
> >>               ...
> >>
> >> In practice it doesn't do anything.  To skip a pidmap page one has to have
> >> 32K consecutive pids allocated at the same time which doesn't happen.
> >>
> >> Currently the price is _every_ fork/exit on every system being slower than
> >> necessary.
> >
> > Agreed, I too never understood how can this counter help.
> >
> > Add Gargi and Rik, the next version of "Replace PID bitmap allocation with IDR
> > API" can conflict with this and the previous pid-delete-reserved_pids.patch.
> 
> I think this patch will become obsolete as pidmap will be removed. As
> for the 1/2
> patch of Alexey's series, I'll incorporate it so that rolled over PIDs
				 ^^^^^^^^^^^
> start from 1 instead of RESERVED_PIDS.

I saw IDR pids, lots of code removed indeed.

Incorporation is not really necessary as RESERVED_PIDS is an independent change.
Just send it as 2/2 or whatever the number or rebase on top of -mm.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-10-03 21:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-02 23:13 + pid-delete-struct-pidmap-nr_free.patch added to -mm tree akpm
2017-10-03 16:15 ` Oleg Nesterov
2017-10-03 17:39   ` Gargi Sharma
2017-10-03 21:56     ` Alexey Dobriyan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.