linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/psi: Fix use-after-free in poll_freewait()
@ 2023-06-13  6:23 Lu Jialin
  2023-06-13 23:42 ` Suren Baghdasaryan
  0 siblings, 1 reply; 3+ messages in thread
From: Lu Jialin @ 2023-06-13  6:23 UTC (permalink / raw)
  To: Lu Jialin, Johannes Weiner, Andrew Morton, Alexander Viro,
	Christian Brauner, Suren Baghdasaryan
  Cc: linux-kernel, linux-fsdevel

We found a UAF bug in remove_wait_queue as follows:

==================================================================
BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
Write of size 4 at addr ffff8881150d7b28 by task psi_trigger/15306
Call Trace:
 dump_stack+0x9c/0xd3
 print_address_description.constprop.0+0x19/0x170
 __kasan_report.cold+0x6c/0x84
 kasan_report+0x3a/0x50
 check_memory_region+0xfd/0x1f0
 _raw_spin_lock_irqsave+0x71/0xe0
 remove_wait_queue+0x26/0xc0
 poll_freewait+0x6b/0x120
 do_sys_poll+0x305/0x400
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x61/0xc6

Allocated by task 15306:
 kasan_save_stack+0x1b/0x40
 __kasan_kmalloc.constprop.0+0xb5/0xe0
 psi_trigger_create.part.0+0xfc/0x450
 cgroup_pressure_write+0xfc/0x3b0
 cgroup_file_write+0x1b3/0x390
 kernfs_fop_write_iter+0x224/0x2e0
 new_sync_write+0x2ac/0x3a0
 vfs_write+0x365/0x430
 ksys_write+0xd5/0x1b0
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x61/0xc6

Freed by task 15850:
 kasan_save_stack+0x1b/0x40
 kasan_set_track+0x1c/0x30
 kasan_set_free_info+0x20/0x40
 __kasan_slab_free+0x151/0x180
 kfree+0xba/0x680
 cgroup_file_release+0x5c/0xe0
 kernfs_drain_open_files+0x122/0x1e0
 kernfs_drain+0xff/0x1e0
 __kernfs_remove.part.0+0x1d1/0x3b0
 kernfs_remove_by_name_ns+0x89/0xf0
 cgroup_addrm_files+0x393/0x3d0
 css_clear_dir+0x8f/0x120
 kill_css+0x41/0xd0
 cgroup_destroy_locked+0x166/0x300
 cgroup_rmdir+0x37/0x140
 kernfs_iop_rmdir+0xbb/0xf0
 vfs_rmdir.part.0+0xa5/0x230
 do_rmdir+0x2e0/0x320
 __x64_sys_unlinkat+0x99/0xc0
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x61/0xc6
==================================================================

If using epoll(), wake_up_pollfree will empty waitqueue and set
wait_queue_head is NULL before free waitqueue of psi trigger. But is
doesn't work when using poll(), which will lead a UAF problem in
poll_freewait coms as following:

(cgroup_rmdir)                      |
psi_trigger_destroy                 |
  wake_up_pollfree(&t->event_wait)  |
    kfree(t)                        |
				    |	(poll_freewait)
				    |     free_poll_entry(pwq->inline_entries + i)
				    |	    remove_wait_queue(entry->wait_address)
				    |	      spin_lock_irqsave(&wq_head->lock)

entry->wait_address in poll_freewait() is t->event_wait in cgroup_rmdir().
t->event_wait is free in psi_trigger_destroy before call poll_freewait(),
therefore wq_head in poll_freewait() has been already freed, which would
lead to a UAF.

similar problem for epoll() has been fixed commit c2dbe32d5db5
("sched/psi: Fix use-after-free in ep_remove_wait_queue()").
epoll wakeup function ep_poll_callback() will empty waitqueue and set
wait_queue_head is NULL when pollflags is POLLFREE and judge pwq->whead
is NULL or not before remove_wait_queue in ep_remove_wait_queue(),
which will fix the UAF bug in ep_remove_wait_queue.

But poll wakeup function pollwake() doesn't do that. To fix the
problem, we empty waitqueue and set wait_address is NULL in pollwake() when
key is POLLFREE. otherwise in remove_wait_queue, which is similar to
epoll().

Fixes: 0e94682b73bf ("psi: introduce psi monitor")
Signed-off-by: Lu Jialin <lujialin4@huawei.com>
---
 fs/select.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/select.c b/fs/select.c
index 0ee55af1a55c..e64c7b4e9959 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -132,7 +132,17 @@ EXPORT_SYMBOL(poll_initwait);
 
 static void free_poll_entry(struct poll_table_entry *entry)
 {
-	remove_wait_queue(entry->wait_address, &entry->wait);
+	wait_queue_head_t *whead;
+
+	rcu_read_lock();
+	/* If it is cleared by POLLFREE, it should be rcu-safe.
+	 * If we read NULL we need a barrier paired with smp_store_release()
+	 * in pollwake().
+	 */
+	whead = smp_load_acquire(&entry->wait_address);
+	if (whead)
+		remove_wait_queue(whead, &entry->wait);
+	rcu_read_unlock();
 	fput(entry->filp);
 }
 
@@ -215,6 +225,14 @@ static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key
 	entry = container_of(wait, struct poll_table_entry, wait);
 	if (key && !(key_to_poll(key) & entry->key))
 		return 0;
+	if (key_to_poll(key) & POLLFREE) {
+		list_del_init(&wait->entry);
+		/* wait_address !=NULL protects us from the race with
+		 * poll_freewait().
+		 */
+		smp_store_release(&entry->wait_address, NULL);
+		return 0;
+	}
 	return __pollwake(wait, mode, sync, key);
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] sched/psi: Fix use-after-free in poll_freewait()
  2023-06-13  6:23 [PATCH] sched/psi: Fix use-after-free in poll_freewait() Lu Jialin
@ 2023-06-13 23:42 ` Suren Baghdasaryan
  2023-06-14  3:47   ` lujialin (A)
  0 siblings, 1 reply; 3+ messages in thread
From: Suren Baghdasaryan @ 2023-06-13 23:42 UTC (permalink / raw)
  To: Lu Jialin
  Cc: Johannes Weiner, Andrew Morton, Alexander Viro,
	Christian Brauner, linux-kernel, linux-fsdevel, Eric Biggers,
	Oleg Nesterov

On Mon, Jun 12, 2023 at 11:24 PM Lu Jialin <lujialin4@huawei.com> wrote:
>
> We found a UAF bug in remove_wait_queue as follows:
>
> ==================================================================
> BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
> Write of size 4 at addr ffff8881150d7b28 by task psi_trigger/15306
> Call Trace:
>  dump_stack+0x9c/0xd3
>  print_address_description.constprop.0+0x19/0x170
>  __kasan_report.cold+0x6c/0x84
>  kasan_report+0x3a/0x50
>  check_memory_region+0xfd/0x1f0
>  _raw_spin_lock_irqsave+0x71/0xe0
>  remove_wait_queue+0x26/0xc0
>  poll_freewait+0x6b/0x120
>  do_sys_poll+0x305/0x400
>  do_syscall_64+0x33/0x40
>  entry_SYSCALL_64_after_hwframe+0x61/0xc6
>
> Allocated by task 15306:
>  kasan_save_stack+0x1b/0x40
>  __kasan_kmalloc.constprop.0+0xb5/0xe0
>  psi_trigger_create.part.0+0xfc/0x450
>  cgroup_pressure_write+0xfc/0x3b0
>  cgroup_file_write+0x1b3/0x390
>  kernfs_fop_write_iter+0x224/0x2e0
>  new_sync_write+0x2ac/0x3a0
>  vfs_write+0x365/0x430
>  ksys_write+0xd5/0x1b0
>  do_syscall_64+0x33/0x40
>  entry_SYSCALL_64_after_hwframe+0x61/0xc6
>
> Freed by task 15850:
>  kasan_save_stack+0x1b/0x40
>  kasan_set_track+0x1c/0x30
>  kasan_set_free_info+0x20/0x40
>  __kasan_slab_free+0x151/0x180
>  kfree+0xba/0x680
>  cgroup_file_release+0x5c/0xe0
>  kernfs_drain_open_files+0x122/0x1e0
>  kernfs_drain+0xff/0x1e0
>  __kernfs_remove.part.0+0x1d1/0x3b0
>  kernfs_remove_by_name_ns+0x89/0xf0
>  cgroup_addrm_files+0x393/0x3d0
>  css_clear_dir+0x8f/0x120
>  kill_css+0x41/0xd0
>  cgroup_destroy_locked+0x166/0x300
>  cgroup_rmdir+0x37/0x140
>  kernfs_iop_rmdir+0xbb/0xf0
>  vfs_rmdir.part.0+0xa5/0x230
>  do_rmdir+0x2e0/0x320
>  __x64_sys_unlinkat+0x99/0xc0
>  do_syscall_64+0x33/0x40
>  entry_SYSCALL_64_after_hwframe+0x61/0xc6
> ==================================================================
>
> If using epoll(), wake_up_pollfree will empty waitqueue and set
> wait_queue_head is NULL before free waitqueue of psi trigger. But is
> doesn't work when using poll(), which will lead a UAF problem in
> poll_freewait coms as following:
>
> (cgroup_rmdir)                      |
> psi_trigger_destroy                 |
>   wake_up_pollfree(&t->event_wait)  |

It's important to note that psi_trigger_destroy() calls
synchronize_rcu() before doing kfree(t), therefore the usage I think
is valid.


>     kfree(t)                        |
>                                     |   (poll_freewait)
>                                     |     free_poll_entry(pwq->inline_entries + i)
>                                     |       remove_wait_queue(entry->wait_address)
>                                     |         spin_lock_irqsave(&wq_head->lock)
>
> entry->wait_address in poll_freewait() is t->event_wait in cgroup_rmdir().
> t->event_wait is free in psi_trigger_destroy before call poll_freewait(),
> therefore wq_head in poll_freewait() has been already freed, which would
> lead to a UAF.
>
> similar problem for epoll() has been fixed commit c2dbe32d5db5
> ("sched/psi: Fix use-after-free in ep_remove_wait_queue()").
> epoll wakeup function ep_poll_callback() will empty waitqueue and set
> wait_queue_head is NULL when pollflags is POLLFREE and judge pwq->whead
> is NULL or not before remove_wait_queue in ep_remove_wait_queue(),
> which will fix the UAF bug in ep_remove_wait_queue.
>
> But poll wakeup function pollwake() doesn't do that. To fix the
> problem, we empty waitqueue and set wait_address is NULL in pollwake() when
> key is POLLFREE. otherwise in remove_wait_queue, which is similar to
> epoll().

Thanks for the patch!
This seems similar to what ep_poll_callback/ep_remove_wait_queue do,
which I think makes sense. CC'ing Oleg Nesterov who implemented
ep_poll_callback/ep_remove_wait_queue logic and Eric Biggers who
worked on wake_up_pollfree() - both much more knowledgeable in this
area than me.

One issue I see with this patch is that the title says "sched/psi:
..." while it's fixing polling functionality. The patch is fixing the
mechanism used by psi triggers, not psi triggers themselves (well it
does but indirectly). Therefore I suggest changing that prefix to
something like "select: Fix use-after-free in poll_freewait()"

Thanks,
Suren.

>
> Fixes: 0e94682b73bf ("psi: introduce psi monitor")
> Signed-off-by: Lu Jialin <lujialin4@huawei.com>
> ---
>  fs/select.c | 20 +++++++++++++++++++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/fs/select.c b/fs/select.c
> index 0ee55af1a55c..e64c7b4e9959 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -132,7 +132,17 @@ EXPORT_SYMBOL(poll_initwait);
>
>  static void free_poll_entry(struct poll_table_entry *entry)
>  {
> -       remove_wait_queue(entry->wait_address, &entry->wait);
> +       wait_queue_head_t *whead;
> +
> +       rcu_read_lock();
> +       /* If it is cleared by POLLFREE, it should be rcu-safe.
> +        * If we read NULL we need a barrier paired with smp_store_release()
> +        * in pollwake().
> +        */
> +       whead = smp_load_acquire(&entry->wait_address);
> +       if (whead)
> +               remove_wait_queue(whead, &entry->wait);
> +       rcu_read_unlock();
>         fput(entry->filp);
>  }
>
> @@ -215,6 +225,14 @@ static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key
>         entry = container_of(wait, struct poll_table_entry, wait);
>         if (key && !(key_to_poll(key) & entry->key))
>                 return 0;
> +       if (key_to_poll(key) & POLLFREE) {
> +               list_del_init(&wait->entry);
> +               /* wait_address !=NULL protects us from the race with
> +                * poll_freewait().
> +                */
> +               smp_store_release(&entry->wait_address, NULL);
> +               return 0;
> +       }
>         return __pollwake(wait, mode, sync, key);
>  }
>
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] sched/psi: Fix use-after-free in poll_freewait()
  2023-06-13 23:42 ` Suren Baghdasaryan
@ 2023-06-14  3:47   ` lujialin (A)
  0 siblings, 0 replies; 3+ messages in thread
From: lujialin (A) @ 2023-06-14  3:47 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: Johannes Weiner, Andrew Morton, Alexander Viro,
	Christian Brauner, linux-kernel, linux-fsdevel, Eric Biggers,
	Oleg Nesterov



在 2023/6/14 7:42, Suren Baghdasaryan 写道:
> On Mon, Jun 12, 2023 at 11:24 PM Lu Jialin <lujialin4@huawei.com> wrote:
>>
>> We found a UAF bug in remove_wait_queue as follows:
>>
>> ==================================================================
>> BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
>> Write of size 4 at addr ffff8881150d7b28 by task psi_trigger/15306
>> Call Trace:
>>   dump_stack+0x9c/0xd3
>>   print_address_description.constprop.0+0x19/0x170
>>   __kasan_report.cold+0x6c/0x84
>>   kasan_report+0x3a/0x50
>>   check_memory_region+0xfd/0x1f0
>>   _raw_spin_lock_irqsave+0x71/0xe0
>>   remove_wait_queue+0x26/0xc0
>>   poll_freewait+0x6b/0x120
>>   do_sys_poll+0x305/0x400
>>   do_syscall_64+0x33/0x40
>>   entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>
>> Allocated by task 15306:
>>   kasan_save_stack+0x1b/0x40
>>   __kasan_kmalloc.constprop.0+0xb5/0xe0
>>   psi_trigger_create.part.0+0xfc/0x450
>>   cgroup_pressure_write+0xfc/0x3b0
>>   cgroup_file_write+0x1b3/0x390
>>   kernfs_fop_write_iter+0x224/0x2e0
>>   new_sync_write+0x2ac/0x3a0
>>   vfs_write+0x365/0x430
>>   ksys_write+0xd5/0x1b0
>>   do_syscall_64+0x33/0x40
>>   entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>
>> Freed by task 15850:
>>   kasan_save_stack+0x1b/0x40
>>   kasan_set_track+0x1c/0x30
>>   kasan_set_free_info+0x20/0x40
>>   __kasan_slab_free+0x151/0x180
>>   kfree+0xba/0x680
>>   cgroup_file_release+0x5c/0xe0
>>   kernfs_drain_open_files+0x122/0x1e0
>>   kernfs_drain+0xff/0x1e0
>>   __kernfs_remove.part.0+0x1d1/0x3b0
>>   kernfs_remove_by_name_ns+0x89/0xf0
>>   cgroup_addrm_files+0x393/0x3d0
>>   css_clear_dir+0x8f/0x120
>>   kill_css+0x41/0xd0
>>   cgroup_destroy_locked+0x166/0x300
>>   cgroup_rmdir+0x37/0x140
>>   kernfs_iop_rmdir+0xbb/0xf0
>>   vfs_rmdir.part.0+0xa5/0x230
>>   do_rmdir+0x2e0/0x320
>>   __x64_sys_unlinkat+0x99/0xc0
>>   do_syscall_64+0x33/0x40
>>   entry_SYSCALL_64_after_hwframe+0x61/0xc6
>> ==================================================================
>>
>> If using epoll(), wake_up_pollfree will empty waitqueue and set
>> wait_queue_head is NULL before free waitqueue of psi trigger. But is
>> doesn't work when using poll(), which will lead a UAF problem in
>> poll_freewait coms as following:
>>
>> (cgroup_rmdir)                      |
>> psi_trigger_destroy                 |
>>    wake_up_pollfree(&t->event_wait)  |
> 
> It's important to note that psi_trigger_destroy() calls
> synchronize_rcu() before doing kfree(t), therefore the usage I think
> is valid.
> 
> 
>>      kfree(t)                        |
>>                                      |   (poll_freewait)
>>                                      |     free_poll_entry(pwq->inline_entries + i)
>>                                      |       remove_wait_queue(entry->wait_address)
>>                                      |         spin_lock_irqsave(&wq_head->lock)
>>
>> entry->wait_address in poll_freewait() is t->event_wait in cgroup_rmdir().
>> t->event_wait is free in psi_trigger_destroy before call poll_freewait(),
>> therefore wq_head in poll_freewait() has been already freed, which would
>> lead to a UAF.
>>
>> similar problem for epoll() has been fixed commit c2dbe32d5db5
>> ("sched/psi: Fix use-after-free in ep_remove_wait_queue()").
>> epoll wakeup function ep_poll_callback() will empty waitqueue and set
>> wait_queue_head is NULL when pollflags is POLLFREE and judge pwq->whead
>> is NULL or not before remove_wait_queue in ep_remove_wait_queue(),
>> which will fix the UAF bug in ep_remove_wait_queue.
>>
>> But poll wakeup function pollwake() doesn't do that. To fix the
>> problem, we empty waitqueue and set wait_address is NULL in pollwake() when
>> key is POLLFREE. otherwise in remove_wait_queue, which is similar to
>> epoll().
> 
> Thanks for the patch!
> This seems similar to what ep_poll_callback/ep_remove_wait_queue do,
> which I think makes sense. CC'ing Oleg Nesterov who implemented
> ep_poll_callback/ep_remove_wait_queue logic and Eric Biggers who
> worked on wake_up_pollfree() - both much more knowledgeable in this
> area than me.
> 
> One issue I see with this patch is that the title says "sched/psi:
> ..." while it's fixing polling functionality. The patch is fixing the
> mechanism used by psi triggers, not psi triggers themselves (well it
> does but indirectly). Therefore I suggest changing that prefix to
> something like "select: Fix use-after-free in poll_freewait()"
> 
> Thanks,
> Suren.
> 
>>
>> Fixes: 0e94682b73bf ("psi: introduce psi monitor")
>> Signed-off-by: Lu Jialin <lujialin4@huawei.com>
>> ---
>>   fs/select.c | 20 +++++++++++++++++++-
>>   1 file changed, 19 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/select.c b/fs/select.c
>> index 0ee55af1a55c..e64c7b4e9959 100644
>> --- a/fs/select.c
>> +++ b/fs/select.c
>> @@ -132,7 +132,17 @@ EXPORT_SYMBOL(poll_initwait);
>>
>>   static void free_poll_entry(struct poll_table_entry *entry)
>>   {
>> -       remove_wait_queue(entry->wait_address, &entry->wait);
>> +       wait_queue_head_t *whead;
>> +
>> +       rcu_read_lock();
>> +       /* If it is cleared by POLLFREE, it should be rcu-safe.
>> +        * If we read NULL we need a barrier paired with smp_store_release()
>> +        * in pollwake().
>> +        */
>> +       whead = smp_load_acquire(&entry->wait_address);
>> +       if (whead)
>> +               remove_wait_queue(whead, &entry->wait);
>> +       rcu_read_unlock();
>>          fput(entry->filp);
>>   }
>>
>> @@ -215,6 +225,14 @@ static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key
>>          entry = container_of(wait, struct poll_table_entry, wait);
>>          if (key && !(key_to_poll(key) & entry->key))
>>                  return 0;
>> +       if (key_to_poll(key) & POLLFREE) {
>> +               list_del_init(&wait->entry);
>> +               /* wait_address !=NULL protects us from the race with
>> +                * poll_freewait().
>> +                */
>> +               smp_store_release(&entry->wait_address, NULL);
>> +               return 0;
>> +       }
>>          return __pollwake(wait, mode, sync, key);
>>   }
>>
>> --
>> 2.17.1
>>
> .
> 
Thanks for your suggestion.I will correct the commit msg and title in 
v2.And cc to Oleg Nesterov and Eric Biggers.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-06-14  3:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-13  6:23 [PATCH] sched/psi: Fix use-after-free in poll_freewait() Lu Jialin
2023-06-13 23:42 ` Suren Baghdasaryan
2023-06-14  3:47   ` lujialin (A)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).