All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
@ 2022-06-01  6:23 Zhihao Cheng
  2022-06-10  8:09 ` Zhihao Cheng
  2022-07-12 14:16 ` Brian Foster
  0 siblings, 2 replies; 6+ messages in thread
From: Zhihao Cheng @ 2022-06-01  6:23 UTC (permalink / raw)
  To: ebiederm; +Cc: linux-kernel, linux-fsdevel, chengzhihao1, yukuai3, yi.zhang

Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
moved proc_flush_task() behind __exit_signal(). Then, process systemd
can take long period high cpu usage during releasing task in following
concurrent processes:

  systemd                                 ps
kernel_waitid                 stat(/proc/tgid)
  do_wait                       filename_lookup
    wait_consider_task            lookup_fast
      release_task
        __exit_signal
          __unhash_process
            detach_pid
              __change_pid // remove task->pid_links
                                     d_revalidate -> pid_revalidate  // 0
                                     d_invalidate(/proc/tgid)
                                       shrink_dcache_parent(/proc/tgid)
                                         d_walk(/proc/tgid)
                                           spin_lock_nested(/proc/tgid/fd)
                                           // iterating opened fd
        proc_flush_pid                                    |
           d_invalidate (/proc/tgid/fd)                   |
              shrink_dcache_parent(/proc/tgid/fd)         |
                shrink_dentry_list(subdirs)               ↓
                  shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock

Function d_invalidate() will remove dentry from hash firstly, but why does
proc_flush_pid() process dentry '/proc/tgid/fd' before dentry '/proc/tgid'?
That's because proc_pid_make_inode() adds proc inode in reverse order by
invoking hlist_add_head_rcu(). But proc should not add any inodes under
'/proc/tgid' except '/proc/tgid/task/pid', fix it by adding inode into
'pid->inodes' only if the inode is /proc/tgid or /proc/tgid/task/pid.

Performance regression:
Create 200 tasks, each task open one file for 50,000 times. Kill all
tasks when opened files exceed 10,000,000 (cat /proc/sys/fs/file-nr).

Before fix:
$ time killall -wq aa
  real    4m40.946s   # During this period, we can see 'ps' and 'systemd'
			taking high cpu usage.

After fix:
$ time killall -wq aa
  real    1m20.732s   # During this period, we can see 'systemd' taking
			high cpu usage.

Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 v1->v2: Add new helper proc_pid_make_base_inode that performs the extra
	 work of adding to the pid->list.
 v2->v3: Add performance regression in commit message.
 v3->v4: Make proc_pid_make_base_inode() static
 fs/proc/base.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index c1031843cc6a..d884933950fd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1885,7 +1885,7 @@ void proc_pid_evict_inode(struct proc_inode *ei)
 	put_pid(pid);
 }
 
-struct inode *proc_pid_make_inode(struct super_block * sb,
+struct inode *proc_pid_make_inode(struct super_block *sb,
 				  struct task_struct *task, umode_t mode)
 {
 	struct inode * inode;
@@ -1914,11 +1914,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 
 	/* Let the pid remember us for quick removal */
 	ei->pid = pid;
-	if (S_ISDIR(mode)) {
-		spin_lock(&pid->lock);
-		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
-		spin_unlock(&pid->lock);
-	}
 
 	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
 	security_task_to_inode(task, inode);
@@ -1931,6 +1926,27 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	return NULL;
 }
 
+static struct inode *proc_pid_make_base_inode(struct super_block *sb,
+				struct task_struct *task, umode_t mode)
+{
+	struct inode *inode;
+	struct proc_inode *ei;
+	struct pid *pid;
+
+	inode = proc_pid_make_inode(sb, task, mode);
+	if (!inode)
+		return NULL;
+
+	/* Let proc_flush_pid find this directory inode */
+	ei = PROC_I(inode);
+	pid = ei->pid;
+	spin_lock(&pid->lock);
+	hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
+	spin_unlock(&pid->lock);
+
+	return inode;
+}
+
 int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
 		struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
@@ -3350,7 +3366,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
 {
 	struct inode *inode;
 
-	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
+	inode = proc_pid_make_base_inode(dentry->d_sb, task,
+					 S_IFDIR | S_IRUGO | S_IXUGO);
 	if (!inode)
 		return ERR_PTR(-ENOENT);
 
@@ -3649,7 +3666,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
 	struct task_struct *task, const void *ptr)
 {
 	struct inode *inode;
-	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
+	inode = proc_pid_make_base_inode(dentry->d_sb, task,
+					 S_IFDIR | S_IRUGO | S_IXUGO);
 	if (!inode)
 		return ERR_PTR(-ENOENT);
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
  2022-06-01  6:23 [PATCH v4] proc: Fix a dentry lock race between release_task and lookup Zhihao Cheng
@ 2022-06-10  8:09 ` Zhihao Cheng
  2022-07-12  3:06   ` Zhihao Cheng
  2022-07-12 14:16 ` Brian Foster
  1 sibling, 1 reply; 6+ messages in thread
From: Zhihao Cheng @ 2022-06-10  8:09 UTC (permalink / raw)
  To: ebiederm; +Cc: linux-kernel, linux-fsdevel, yukuai3, yi.zhang

在 2022/6/1 14:23, Zhihao Cheng 写道:
friendly ping
> Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
> moved proc_flush_task() behind __exit_signal(). Then, process systemd
> can take long period high cpu usage during releasing task in following
> concurrent processes:
>
>    systemd                                 ps
> kernel_waitid                 stat(/proc/tgid)
>    do_wait                       filename_lookup
>      wait_consider_task            lookup_fast
>        release_task
>          __exit_signal
>            __unhash_process
>              detach_pid
>                __change_pid // remove task->pid_links
>                                       d_revalidate -> pid_revalidate  // 0
>                                       d_invalidate(/proc/tgid)
>                                         shrink_dcache_parent(/proc/tgid)
>                                           d_walk(/proc/tgid)
>                                             spin_lock_nested(/proc/tgid/fd)
>                                             // iterating opened fd
>          proc_flush_pid                                    |
>             d_invalidate (/proc/tgid/fd)                   |
>                shrink_dcache_parent(/proc/tgid/fd)         |
>                  shrink_dentry_list(subdirs)               ↓
>                    shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock
>
> Function d_invalidate() will remove dentry from hash firstly, but why does
> proc_flush_pid() process dentry '/proc/tgid/fd' before dentry '/proc/tgid'?
> That's because proc_pid_make_inode() adds proc inode in reverse order by
> invoking hlist_add_head_rcu(). But proc should not add any inodes under
> '/proc/tgid' except '/proc/tgid/task/pid', fix it by adding inode into
> 'pid->inodes' only if the inode is /proc/tgid or /proc/tgid/task/pid.
>
> Performance regression:
> Create 200 tasks, each task open one file for 50,000 times. Kill all
> tasks when opened files exceed 10,000,000 (cat /proc/sys/fs/file-nr).
>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
  2022-06-10  8:09 ` Zhihao Cheng
@ 2022-07-12  3:06   ` Zhihao Cheng
  0 siblings, 0 replies; 6+ messages in thread
From: Zhihao Cheng @ 2022-07-12  3:06 UTC (permalink / raw)
  To: ebiederm
  Cc: linux-kernel, linux-fsdevel, yukuai3, Matthew Wilcox, bhe,
	bfoster, Andrew Morton, kaleshsingh

在 2022/6/10 16:09, Zhihao Cheng 写道:
> 在 2022/6/1 14:23, Zhihao Cheng 写道:
ping again.
> friendly ping
>> Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
>> moved proc_flush_task() behind __exit_signal(). Then, process systemd
>> can take long period high cpu usage during releasing task in following
>> concurrent processes:
>>
>>    systemd                                 ps
>> kernel_waitid                 stat(/proc/tgid)
>>    do_wait                       filename_lookup
>>      wait_consider_task            lookup_fast
>>        release_task
>>          __exit_signal
>>            __unhash_process
>>              detach_pid
>>                __change_pid // remove task->pid_links
>>                                       d_revalidate -> pid_revalidate  
>> // 0
>>                                       d_invalidate(/proc/tgid)
>>                                         shrink_dcache_parent(/proc/tgid)
>>                                           d_walk(/proc/tgid)
>>                                             
>> spin_lock_nested(/proc/tgid/fd)
>>                                             // iterating opened fd
>>          proc_flush_pid                                    |
>>             d_invalidate (/proc/tgid/fd)                   |
>>                shrink_dcache_parent(/proc/tgid/fd)         |
>>                  shrink_dentry_list(subdirs)               ↓
>>                    shrink_lock_dentry(/proc/tgid/fd) --> race on 
>> dentry lock
>>
>> Function d_invalidate() will remove dentry from hash firstly, but why 
>> does
>> proc_flush_pid() process dentry '/proc/tgid/fd' before dentry 
>> '/proc/tgid'?
>> That's because proc_pid_make_inode() adds proc inode in reverse order by
>> invoking hlist_add_head_rcu(). But proc should not add any inodes under
>> '/proc/tgid' except '/proc/tgid/task/pid', fix it by adding inode into
>> 'pid->inodes' only if the inode is /proc/tgid or /proc/tgid/task/pid.
>>
>> Performance regression:
>> Create 200 tasks, each task open one file for 50,000 times. Kill all
>> tasks when opened files exceed 10,000,000 (cat /proc/sys/fs/file-nr).
>>
> 
> .


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
  2022-06-01  6:23 [PATCH v4] proc: Fix a dentry lock race between release_task and lookup Zhihao Cheng
  2022-06-10  8:09 ` Zhihao Cheng
@ 2022-07-12 14:16 ` Brian Foster
  2022-07-13  7:24   ` Zhihao Cheng
  1 sibling, 1 reply; 6+ messages in thread
From: Brian Foster @ 2022-07-12 14:16 UTC (permalink / raw)
  To: Zhihao Cheng; +Cc: ebiederm, linux-kernel, linux-fsdevel, yukuai3, yi.zhang

On Wed, Jun 01, 2022 at 02:23:32PM +0800, Zhihao Cheng wrote:
> Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
> moved proc_flush_task() behind __exit_signal(). Then, process systemd
> can take long period high cpu usage during releasing task in following
> concurrent processes:
> 
>   systemd                                 ps
> kernel_waitid                 stat(/proc/tgid)
>   do_wait                       filename_lookup
>     wait_consider_task            lookup_fast
>       release_task
>         __exit_signal
>           __unhash_process
>             detach_pid
>               __change_pid // remove task->pid_links
>                                      d_revalidate -> pid_revalidate  // 0
>                                      d_invalidate(/proc/tgid)
>                                        shrink_dcache_parent(/proc/tgid)
>                                          d_walk(/proc/tgid)
>                                            spin_lock_nested(/proc/tgid/fd)
>                                            // iterating opened fd
>         proc_flush_pid                                    |
>            d_invalidate (/proc/tgid/fd)                   |
>               shrink_dcache_parent(/proc/tgid/fd)         |
>                 shrink_dentry_list(subdirs)               ↓
>                   shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock
> 

Curious... can this same sort of thing happen with /proc/<tgid>/task if
that dir similarly has a lot of dentries?

...
> Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054
> Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>  v1->v2: Add new helper proc_pid_make_base_inode that performs the extra
> 	 work of adding to the pid->list.
>  v2->v3: Add performance regression in commit message.
>  v3->v4: Make proc_pid_make_base_inode() static
>  fs/proc/base.c | 34 ++++++++++++++++++++++++++--------
>  1 file changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index c1031843cc6a..d884933950fd 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
...
> @@ -1931,6 +1926,27 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
>  	return NULL;
>  }
>  
> +static struct inode *proc_pid_make_base_inode(struct super_block *sb,
> +				struct task_struct *task, umode_t mode)
> +{
> +	struct inode *inode;
> +	struct proc_inode *ei;
> +	struct pid *pid;
> +
> +	inode = proc_pid_make_inode(sb, task, mode);
> +	if (!inode)
> +		return NULL;
> +
> +	/* Let proc_flush_pid find this directory inode */
> +	ei = PROC_I(inode);
> +	pid = ei->pid;
> +	spin_lock(&pid->lock);
> +	hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
> +	spin_unlock(&pid->lock);
> +
> +	return inode;
> +}
> +

Somewhat related to the question above.. it would be nice if this
wrapper had a line or two comment above it that explained when it should
or shouldn't be used over the underlying function (for example, why or
why not include /proc/<tgid>/task?). Otherwise the patch overall seems
reasonable to me..

Brian

>  int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
>  		struct kstat *stat, u32 request_mask, unsigned int query_flags)
>  {
> @@ -3350,7 +3366,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
>  {
>  	struct inode *inode;
>  
> -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
> +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
> +					 S_IFDIR | S_IRUGO | S_IXUGO);
>  	if (!inode)
>  		return ERR_PTR(-ENOENT);
>  
> @@ -3649,7 +3666,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
>  	struct task_struct *task, const void *ptr)
>  {
>  	struct inode *inode;
> -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
> +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
> +					 S_IFDIR | S_IRUGO | S_IXUGO);
>  	if (!inode)
>  		return ERR_PTR(-ENOENT);
>  
> -- 
> 2.31.1
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
  2022-07-12 14:16 ` Brian Foster
@ 2022-07-13  7:24   ` Zhihao Cheng
  2022-07-13 12:41     ` Brian Foster
  0 siblings, 1 reply; 6+ messages in thread
From: Zhihao Cheng @ 2022-07-13  7:24 UTC (permalink / raw)
  To: Brian Foster; +Cc: ebiederm, linux-kernel, linux-fsdevel, yukuai3, yi.zhang

在 2022/7/12 22:16, Brian Foster 写道:
> On Wed, Jun 01, 2022 at 02:23:32PM +0800, Zhihao Cheng wrote:
>> Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
>> moved proc_flush_task() behind __exit_signal(). Then, process systemd
>> can take long period high cpu usage during releasing task in following
>> concurrent processes:
>>
>>    systemd                                 ps
>> kernel_waitid                 stat(/proc/tgid)
>>    do_wait                       filename_lookup
>>      wait_consider_task            lookup_fast
>>        release_task
>>          __exit_signal
>>            __unhash_process
>>              detach_pid
>>                __change_pid // remove task->pid_links
>>                                       d_revalidate -> pid_revalidate  // 0
>>                                       d_invalidate(/proc/tgid)
>>                                         shrink_dcache_parent(/proc/tgid)
>>                                           d_walk(/proc/tgid)
>>                                             spin_lock_nested(/proc/tgid/fd)
>>                                             // iterating opened fd
>>          proc_flush_pid                                    |
>>             d_invalidate (/proc/tgid/fd)                   |
>>                shrink_dcache_parent(/proc/tgid/fd)         |
>>                  shrink_dentry_list(subdirs)               ↓
>>                    shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock
>>
> 
> Curious... can this same sort of thing happen with /proc/<tgid>/task if
> that dir similarly has a lot of dentries?
> 

Yes. It could happend too. There will be many dentries under 
/proc/<tgid>/task when there are many tasks under same thread group.

We must put /proc/<tgid>/task into pid->inodes, because we have to 
handle single thread exiting situation: Any one of threads should 
invalidate its /proc/<tgid>/task/<pid> dentry before begin released. You 
may refer to the function proc_flush_task_mnt() before commit 
7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc").

> ...
>> Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054
>> Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>>   v1->v2: Add new helper proc_pid_make_base_inode that performs the extra
>> 	 work of adding to the pid->list.
>>   v2->v3: Add performance regression in commit message.
>>   v3->v4: Make proc_pid_make_base_inode() static
>>   fs/proc/base.c | 34 ++++++++++++++++++++++++++--------
>>   1 file changed, 26 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>> index c1031843cc6a..d884933950fd 100644
>> --- a/fs/proc/base.c
>> +++ b/fs/proc/base.c
> ...
>> @@ -1931,6 +1926,27 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
>>   	return NULL;
>>   }
>>   
>> +static struct inode *proc_pid_make_base_inode(struct super_block *sb,
>> +				struct task_struct *task, umode_t mode)
>> +{
>> +	struct inode *inode;
>> +	struct proc_inode *ei;
>> +	struct pid *pid;
>> +
>> +	inode = proc_pid_make_inode(sb, task, mode);
>> +	if (!inode)
>> +		return NULL;
>> +
>> +	/* Let proc_flush_pid find this directory inode */
>> +	ei = PROC_I(inode);
>> +	pid = ei->pid;
>> +	spin_lock(&pid->lock);
>> +	hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
>> +	spin_unlock(&pid->lock);
>> +
>> +	return inode;
>> +}
>> +
> 
> Somewhat related to the question above.. it would be nice if this
> wrapper had a line or two comment above it that explained when it should
> or shouldn't be used over the underlying function (for example, why or
> why not include /proc/<tgid>/task?). Otherwise the patch overall seems
> reasonable to me..
> 

Thanks for advice, I will add some notes in v5.
> Brian
> 
>>   int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
>>   		struct kstat *stat, u32 request_mask, unsigned int query_flags)
>>   {
>> @@ -3350,7 +3366,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
>>   {
>>   	struct inode *inode;
>>   
>> -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
>> +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
>> +					 S_IFDIR | S_IRUGO | S_IXUGO);
>>   	if (!inode)
>>   		return ERR_PTR(-ENOENT);
>>   
>> @@ -3649,7 +3666,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
>>   	struct task_struct *task, const void *ptr)
>>   {
>>   	struct inode *inode;
>> -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
>> +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
>> +					 S_IFDIR | S_IRUGO | S_IXUGO);
>>   	if (!inode)
>>   		return ERR_PTR(-ENOENT);
>>   
>> -- 
>> 2.31.1
>>
> 
> .
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4] proc: Fix a dentry lock race between release_task and lookup
  2022-07-13  7:24   ` Zhihao Cheng
@ 2022-07-13 12:41     ` Brian Foster
  0 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2022-07-13 12:41 UTC (permalink / raw)
  To: Zhihao Cheng; +Cc: ebiederm, linux-kernel, linux-fsdevel, yukuai3, yi.zhang

On Wed, Jul 13, 2022 at 03:24:50PM +0800, Zhihao Cheng wrote:
> 在 2022/7/12 22:16, Brian Foster 写道:
> > On Wed, Jun 01, 2022 at 02:23:32PM +0800, Zhihao Cheng wrote:
> > > Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
> > > moved proc_flush_task() behind __exit_signal(). Then, process systemd
> > > can take long period high cpu usage during releasing task in following
> > > concurrent processes:
> > > 
> > >    systemd                                 ps
> > > kernel_waitid                 stat(/proc/tgid)
> > >    do_wait                       filename_lookup
> > >      wait_consider_task            lookup_fast
> > >        release_task
> > >          __exit_signal
> > >            __unhash_process
> > >              detach_pid
> > >                __change_pid // remove task->pid_links
> > >                                       d_revalidate -> pid_revalidate  // 0
> > >                                       d_invalidate(/proc/tgid)
> > >                                         shrink_dcache_parent(/proc/tgid)
> > >                                           d_walk(/proc/tgid)
> > >                                             spin_lock_nested(/proc/tgid/fd)
> > >                                             // iterating opened fd
> > >          proc_flush_pid                                    |
> > >             d_invalidate (/proc/tgid/fd)                   |
> > >                shrink_dcache_parent(/proc/tgid/fd)         |
> > >                  shrink_dentry_list(subdirs)               ↓
> > >                    shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock
> > > 
> > 
> > Curious... can this same sort of thing happen with /proc/<tgid>/task if
> > that dir similarly has a lot of dentries?
> > 
> 
> Yes. It could happend too. There will be many dentries under
> /proc/<tgid>/task when there are many tasks under same thread group.
> 
> We must put /proc/<tgid>/task into pid->inodes, because we have to handle
> single thread exiting situation: Any one of threads should invalidate its
> /proc/<tgid>/task/<pid> dentry before begin released. You may refer to the
> function proc_flush_task_mnt() before commit 7bc3e6e55acf06 ("proc: Use a
> list of inodes to flush from proc").
> 

Ah, I see. So historically when the (thread) task goes away, we look up
the tgid and then the associated /proc/<tgid>/task/<pid> dentry to zap
it. Thanks for the pointer..

Brian

> > ...
> > > Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc")
> > > Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054
> > > Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
> > > Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> > > ---
> > >   v1->v2: Add new helper proc_pid_make_base_inode that performs the extra
> > > 	 work of adding to the pid->list.
> > >   v2->v3: Add performance regression in commit message.
> > >   v3->v4: Make proc_pid_make_base_inode() static
> > >   fs/proc/base.c | 34 ++++++++++++++++++++++++++--------
> > >   1 file changed, 26 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > > index c1031843cc6a..d884933950fd 100644
> > > --- a/fs/proc/base.c
> > > +++ b/fs/proc/base.c
> > ...
> > > @@ -1931,6 +1926,27 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
> > >   	return NULL;
> > >   }
> > > +static struct inode *proc_pid_make_base_inode(struct super_block *sb,
> > > +				struct task_struct *task, umode_t mode)
> > > +{
> > > +	struct inode *inode;
> > > +	struct proc_inode *ei;
> > > +	struct pid *pid;
> > > +
> > > +	inode = proc_pid_make_inode(sb, task, mode);
> > > +	if (!inode)
> > > +		return NULL;
> > > +
> > > +	/* Let proc_flush_pid find this directory inode */
> > > +	ei = PROC_I(inode);
> > > +	pid = ei->pid;
> > > +	spin_lock(&pid->lock);
> > > +	hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
> > > +	spin_unlock(&pid->lock);
> > > +
> > > +	return inode;
> > > +}
> > > +
> > 
> > Somewhat related to the question above.. it would be nice if this
> > wrapper had a line or two comment above it that explained when it should
> > or shouldn't be used over the underlying function (for example, why or
> > why not include /proc/<tgid>/task?). Otherwise the patch overall seems
> > reasonable to me..
> > 
> 
> Thanks for advice, I will add some notes in v5.
> > Brian
> > 
> > >   int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
> > >   		struct kstat *stat, u32 request_mask, unsigned int query_flags)
> > >   {
> > > @@ -3350,7 +3366,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
> > >   {
> > >   	struct inode *inode;
> > > -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
> > > +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
> > > +					 S_IFDIR | S_IRUGO | S_IXUGO);
> > >   	if (!inode)
> > >   		return ERR_PTR(-ENOENT);
> > > @@ -3649,7 +3666,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
> > >   	struct task_struct *task, const void *ptr)
> > >   {
> > >   	struct inode *inode;
> > > -	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
> > > +	inode = proc_pid_make_base_inode(dentry->d_sb, task,
> > > +					 S_IFDIR | S_IRUGO | S_IXUGO);
> > >   	if (!inode)
> > >   		return ERR_PTR(-ENOENT);
> > > -- 
> > > 2.31.1
> > > 
> > 
> > .
> > 
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-07-13 12:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-01  6:23 [PATCH v4] proc: Fix a dentry lock race between release_task and lookup Zhihao Cheng
2022-06-10  8:09 ` Zhihao Cheng
2022-07-12  3:06   ` Zhihao Cheng
2022-07-12 14:16 ` Brian Foster
2022-07-13  7:24   ` Zhihao Cheng
2022-07-13 12:41     ` Brian Foster

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.