linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* uprobes: memory leak in enable/disable loop
@ 2016-08-15 20:58 Brenden Blanco
  2016-08-16 14:13 ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Brenden Blanco @ 2016-08-15 20:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Alexei Starovoitov, Oleg Nesterov

Hi folks,

I think I have come across a memory leak in uprobes, which is fairly easy to
reproduce.

Using a repeated use of a single uprobe, I am able to run my system out of
memory, resulting in system failures and a need to reboot the box.  When I
first became aware of the failure, I was able to narrow down the first
userspace-visible failure to -ENOMEM being returned from
kernel/events/uprobes.c:uprobe_write_opcode(), though I can't say whether that
is the actual cause of the leak or just a victim. With the below reproduction
steps, hopefully someone smarter than me can understand the problem and propose
a solution, which I would be happy to test.

So far, I have reproduced the issue on kernels 4.4 and 4.8-rc2. I've tried on
distributions from Ubuntu as well as Arch Linux. Continue reading for a rough
log of the steps to reproduce.

Thanks,
Brenden

----------------------------------------

root@localhost# uname -r
4.8.0-040800rc2-generic
root@localhost# cd /sys/kernel/debug/tracing
root@localhost# echo "p:uprobes/libc_so_6 /lib/x86_64-linux-gnu/libc.so.6:0x0000000000086560" > uprobe_events
root@localhost# # number of iterations before failure depends on the amount of memory in your system
root@localhost# for i in {1..48}; do echo 1 > events/uprobes/libc_so_6/enable; echo 0 > events/uprobes/libc_so_6/enable; done

root@localhost# dmesg
[ 2182.439038] ------------[ cut here ]------------
[ 2182.439042] WARNING: CPU: 6 PID: 4763 at /home/kernel/COD/linux/mm/page_counter.c:26 page_counter_uncharge+0x1d/0x30
[ 2182.439043] Modules linked in: fuse binfmt_misc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep ppdev snd_pcm parport_pc snd_timer sg virtio_balloon virtio_console acpi_cpufreq parport serio_raw i2c_piix4 snd soundcore tpm_tis intel_powerclamp tpm_tis_core intel_rapl_perf evdev squashfs tpm loop ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 ext4 crc16 jbd2 fscrypto mbcache btrfs dm_mod raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod ata_generic 8139too crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel qxl aesni_intel aes_x86_64 lrw ttm virtio_pci ehci_pci uhci_hcd drm_kms_helper ehci_hcd ata_piix
[ 2182.439067]  gf128mul psmouse virtio_ring glue_helper libata ablk_helper 8139cp scsi_mod cryptd mii usbcore usb_common floppy virtio drm button
[ 2182.439074] CPU: 6 PID: 4763 Comm: bash Not tainted 4.8.0-040800rc2-generic #201608142332
[ 2182.439074] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20160701_074356-anatol 04/01/2014
[ 2182.439075]  0000000000000286 00000000d2708db2 ffffffff9ad3de94 0000000000000000
[ 2182.439076]  0000000000000000 ffffffff9aa7f4ee ffff8be0e749e8c0 000000000000001f
[ 2182.439077]  ffff8be0e749e800 000000000000001f ffff8be0f4647bb0 ffff8be0e71860c0
[ 2182.439078] Call Trace:
[ 2182.439081]  [<ffffffff9ad3de94>] ? dump_stack+0x5c/0x78
[ 2182.439083]  [<ffffffff9aa7f4ee>] ? __warn+0xbe/0xe0
[ 2182.439084]  [<ffffffff9abff52d>] ? page_counter_uncharge+0x1d/0x30
[ 2182.439085]  [<ffffffff9ac01512>] ? drain_stock.isra.30+0x32/0xa0
[ 2182.439086]  [<ffffffff9ac023f1>] ? try_charge+0x5f1/0x6b0
[ 2182.439087]  [<ffffffff9abe610b>] ? alloc_pages_vma+0xbb/0x240
[ 2182.439088]  [<ffffffff9ac06157>] ? mem_cgroup_try_charge+0x67/0x1b0
[ 2182.439090]  [<ffffffff9ab8156c>] ? uprobe_write_opcode+0x25c/0x5e0
[ 2182.439091]  [<ffffffff9ab81965>] ? install_breakpoint.isra.21.part.22+0x55/0x70
[ 2182.439092]  [<ffffffff9ab81d0d>] ? register_for_each_vma+0x36d/0x420
[ 2182.439092]  [<ffffffff9ab8201e>] ? uprobe_register+0x16e/0x290
[ 2182.439094]  [<ffffffff9ab65aef>] ? probe_event_enable+0xef/0x350
[ 2182.439096]  [<ffffffff9ab51cdc>] ? __ftrace_event_enable_disable+0x5c/0x1e0
[ 2182.439097]  [<ffffffff9ab52645>] ? event_enable_write+0x95/0xe0
[ 2182.439098]  [<ffffffff9ac0ebc3>] ? __vfs_write+0x33/0x160
[ 2182.439101]  [<ffffffff9ac2e49e>] ? __fd_install+0x2e/0xe0
[ 2182.439102]  [<ffffffff9ac2e313>] ? __alloc_fd+0x43/0x170
[ 2182.439104]  [<ffffffff9acacf78>] ? security_file_permission+0x38/0xb0
[ 2182.439105]  [<ffffffff9ac0f2c0>] ? vfs_write+0xb0/0x190
[ 2182.439106]  [<ffffffff9ac10692>] ? SyS_write+0x52/0xc0
[ 2182.439107]  [<ffffffff9ac2ea65>] ? SyS_dup2+0x95/0x100
[ 2182.439109]  [<ffffffff9b01d8b6>] ? entry_SYSCALL_64_fastpath+0x1e/0xa8
[ 2182.439110] ---[ end trace 320efb00be7bb830 ]---

After this, continued toggling of the uprobe will eventually fail.

root@localhost# for i in {1..100}; do echo 1 > events/uprobes/libc_so_6/enable; [[ $? -ne 0 ]] && break; echo 0 > events/uprobes/libc_so_6/enable; done
-bash: echo: write error: Cannot allocate memory

After running the uprobes out of memory, oom-killer started to pick off system processes:

[   97.334378] acpid invoked oom-killer: gfp_mask=0x24000c0(GFP_KERNEL), order=0, oom_score_adj=0
[   97.334379] acpid cpuset=/ mems_allowed=0
[   97.334383] CPU: 4 PID: 4179 Comm: acpid Tainted: G        W       4.8.0-040800rc2-generic #201608142332
[   97.334384] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20160701_074356-anatol 04/01/2014
[   97.334385]  0000000000000286 000000006b30ae78 ffffffffa673de94 ffff9522a946be08
[   97.334387]  ffff9522a9464640 ffffffffa660b716 ffff9522a946bd78 ffff9522a9453cc0
[   97.334388]  0000000000030080 ffffffffa658c0e6 0007ffffffffffff ffff9522a9464640
[   97.334390] Call Trace:
[   97.334395]  [<ffffffffa673de94>] ? dump_stack+0x5c/0x78
[   97.334397]  [<ffffffffa660b716>] ? dump_header+0x59/0x1dc
[   97.334398]  [<ffffffffa658c0e6>] ? find_lock_task_mm+0x36/0x80
[   97.334400]  [<ffffffffa658ccb2>] ? oom_kill_process+0x222/0x3e0
[   97.334402]  [<ffffffffa6602910>] ? mem_cgroup_iter+0x100/0x2e0
[   97.334403]  [<ffffffffa6604c2d>] ? mem_cgroup_out_of_memory+0x28d/0x2d0
[   97.334405]  [<ffffffffa660575e>] ? mem_cgroup_oom_synchronize+0x32e/0x340
[   97.334406]  [<ffffffffa6600ab0>] ? memory_high_write+0xd0/0xd0
[   97.334408]  [<ffffffffa658d30d>] ? pagefault_out_of_memory+0x4d/0xc0
[   97.334410]  [<ffffffffa6a1ec88>] ? async_page_fault+0x28/0x30
[   97.334411] Task in /system.slice/acpid.service killed as a result of limit of /system.slice/acpid.service
[   97.334414] memory: usage 18446744073709551276kB, limit 9007199254740988kB, failcnt 121
[   97.334414] memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
[   97.334415] kmem: usage 52kB, limit 9007199254740988kB, failcnt 0
[   97.334415] Memory cgroup stats for /system.slice/acpid.service: cache:0KB rss:0KB rss_huge:0KB mapped_file:0KB dirty:0KB writeback:0KB inactive_anon:0KB active_anon:0KB inactive_file:0KB active_file:0KB unevictable:0KB
[   97.334422] [ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds swapents oom_score_adj name
[   97.334637] [ 4179]     0  4179     1100      287       7       3       37             0 acpid
[   97.334651] Memory cgroup out of memory: Kill process 4179 (acpid) score 0 or sacrifice child
[   97.334695] Killed process 4179 (acpid) total-vm:4400kB, anon-rss:0kB, file-rss:1148kB, shmem-rss:0kB

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-15 20:58 uprobes: memory leak in enable/disable loop Brenden Blanco
@ 2016-08-16 14:13 ` Oleg Nesterov
  2016-08-16 14:25   ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-16 14:13 UTC (permalink / raw)
  To: Brenden Blanco, Johannes Weiner, Michal Hocko, Vladimir Davydov
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Alexei Starovoitov

On 08/15, Brenden Blanco wrote:
>
> Hi folks,
>
> I think I have come across a memory leak in uprobes, which is fairly easy to
> reproduce.

At first glance this looks as a problem in memcg, add CC's...

put_page(old_page) looks properly balanced, and I assume we do not need
the additional "uncharge", we can rely on __page_cache_release().

And I do not see any leak if I try to reproduce with CONFIG_MEMCG=n.

> Using a repeated use of a single uprobe, I am able to run my system out of
> memory, resulting in system failures and a need to reboot the box.  When I
> first became aware of the failure, I was able to narrow down the first
> userspace-visible failure to -ENOMEM being returned from
> kernel/events/uprobes.c:uprobe_write_opcode(), though I can't say whether that
> is the actual cause of the leak or just a victim. With the below reproduction
> steps, hopefully someone smarter than me can understand the problem and propose
> a solution, which I would be happy to test.
> 
> So far, I have reproduced the issue on kernels 4.4 and 4.8-rc2. I've tried on
> distributions from Ubuntu as well as Arch Linux. Continue reading for a rough
> log of the steps to reproduce.
> 
> Thanks,
> Brenden
> 
> ----------------------------------------
> 
> root@localhost# uname -r
> 4.8.0-040800rc2-generic
> root@localhost# cd /sys/kernel/debug/tracing
> root@localhost# echo "p:uprobes/libc_so_6 /lib/x86_64-linux-gnu/libc.so.6:0x0000000000086560" > uprobe_events
> root@localhost# # number of iterations before failure depends on the amount of memory in your system
> root@localhost# for i in {1..48}; do echo 1 > events/uprobes/libc_so_6/enable; echo 0 > events/uprobes/libc_so_6/enable; done
> 
> root@localhost# dmesg
> [ 2182.439038] ------------[ cut here ]------------
> [ 2182.439042] WARNING: CPU: 6 PID: 4763 at /home/kernel/COD/linux/mm/page_counter.c:26 page_counter_uncharge+0x1d/0x30
> [ 2182.439043] Modules linked in: fuse binfmt_misc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep ppdev snd_pcm parport_pc snd_timer sg virtio_balloon virtio_console acpi_cpufreq parport serio_raw i2c_piix4 snd soundcore tpm_tis intel_powerclamp tpm_tis_core intel_rapl_perf evdev squashfs tpm loop ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 ext4 crc16 jbd2 fscrypto mbcache btrfs dm_mod raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod ata_generic 8139too crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel qxl aesni_intel aes_x86_64 lrw ttm virtio_pci ehci_pci uhci_hcd drm_kms_helper ehci_hcd ata_piix
> [ 2182.439067]  gf128mul psmouse virtio_ring glue_helper libata ablk_helper 8139cp scsi_mod cryptd mii usbcore usb_common floppy virtio drm button
> [ 2182.439074] CPU: 6 PID: 4763 Comm: bash Not tainted 4.8.0-040800rc2-generic #201608142332
> [ 2182.439074] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20160701_074356-anatol 04/01/2014
> [ 2182.439075]  0000000000000286 00000000d2708db2 ffffffff9ad3de94 0000000000000000
> [ 2182.439076]  0000000000000000 ffffffff9aa7f4ee ffff8be0e749e8c0 000000000000001f
> [ 2182.439077]  ffff8be0e749e800 000000000000001f ffff8be0f4647bb0 ffff8be0e71860c0
> [ 2182.439078] Call Trace:
> [ 2182.439081]  [<ffffffff9ad3de94>] ? dump_stack+0x5c/0x78
> [ 2182.439083]  [<ffffffff9aa7f4ee>] ? __warn+0xbe/0xe0
> [ 2182.439084]  [<ffffffff9abff52d>] ? page_counter_uncharge+0x1d/0x30
> [ 2182.439085]  [<ffffffff9ac01512>] ? drain_stock.isra.30+0x32/0xa0
> [ 2182.439086]  [<ffffffff9ac023f1>] ? try_charge+0x5f1/0x6b0
> [ 2182.439087]  [<ffffffff9abe610b>] ? alloc_pages_vma+0xbb/0x240
> [ 2182.439088]  [<ffffffff9ac06157>] ? mem_cgroup_try_charge+0x67/0x1b0
> [ 2182.439090]  [<ffffffff9ab8156c>] ? uprobe_write_opcode+0x25c/0x5e0
> [ 2182.439091]  [<ffffffff9ab81965>] ? install_breakpoint.isra.21.part.22+0x55/0x70
> [ 2182.439092]  [<ffffffff9ab81d0d>] ? register_for_each_vma+0x36d/0x420
> [ 2182.439092]  [<ffffffff9ab8201e>] ? uprobe_register+0x16e/0x290
> [ 2182.439094]  [<ffffffff9ab65aef>] ? probe_event_enable+0xef/0x350
> [ 2182.439096]  [<ffffffff9ab51cdc>] ? __ftrace_event_enable_disable+0x5c/0x1e0
> [ 2182.439097]  [<ffffffff9ab52645>] ? event_enable_write+0x95/0xe0
> [ 2182.439098]  [<ffffffff9ac0ebc3>] ? __vfs_write+0x33/0x160
> [ 2182.439101]  [<ffffffff9ac2e49e>] ? __fd_install+0x2e/0xe0
> [ 2182.439102]  [<ffffffff9ac2e313>] ? __alloc_fd+0x43/0x170
> [ 2182.439104]  [<ffffffff9acacf78>] ? security_file_permission+0x38/0xb0
> [ 2182.439105]  [<ffffffff9ac0f2c0>] ? vfs_write+0xb0/0x190
> [ 2182.439106]  [<ffffffff9ac10692>] ? SyS_write+0x52/0xc0
> [ 2182.439107]  [<ffffffff9ac2ea65>] ? SyS_dup2+0x95/0x100
> [ 2182.439109]  [<ffffffff9b01d8b6>] ? entry_SYSCALL_64_fastpath+0x1e/0xa8
> [ 2182.439110] ---[ end trace 320efb00be7bb830 ]---
> 
> After this, continued toggling of the uprobe will eventually fail.
> 
> root@localhost# for i in {1..100}; do echo 1 > events/uprobes/libc_so_6/enable; [[ $? -ne 0 ]] && break; echo 0 > events/uprobes/libc_so_6/enable; done
> -bash: echo: write error: Cannot allocate memory
> 
> After running the uprobes out of memory, oom-killer started to pick off system processes:
> 
> [   97.334378] acpid invoked oom-killer: gfp_mask=0x24000c0(GFP_KERNEL), order=0, oom_score_adj=0
> [   97.334379] acpid cpuset=/ mems_allowed=0
> [   97.334383] CPU: 4 PID: 4179 Comm: acpid Tainted: G        W       4.8.0-040800rc2-generic #201608142332
> [   97.334384] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20160701_074356-anatol 04/01/2014
> [   97.334385]  0000000000000286 000000006b30ae78 ffffffffa673de94 ffff9522a946be08
> [   97.334387]  ffff9522a9464640 ffffffffa660b716 ffff9522a946bd78 ffff9522a9453cc0
> [   97.334388]  0000000000030080 ffffffffa658c0e6 0007ffffffffffff ffff9522a9464640
> [   97.334390] Call Trace:
> [   97.334395]  [<ffffffffa673de94>] ? dump_stack+0x5c/0x78
> [   97.334397]  [<ffffffffa660b716>] ? dump_header+0x59/0x1dc
> [   97.334398]  [<ffffffffa658c0e6>] ? find_lock_task_mm+0x36/0x80
> [   97.334400]  [<ffffffffa658ccb2>] ? oom_kill_process+0x222/0x3e0
> [   97.334402]  [<ffffffffa6602910>] ? mem_cgroup_iter+0x100/0x2e0
> [   97.334403]  [<ffffffffa6604c2d>] ? mem_cgroup_out_of_memory+0x28d/0x2d0
> [   97.334405]  [<ffffffffa660575e>] ? mem_cgroup_oom_synchronize+0x32e/0x340
> [   97.334406]  [<ffffffffa6600ab0>] ? memory_high_write+0xd0/0xd0
> [   97.334408]  [<ffffffffa658d30d>] ? pagefault_out_of_memory+0x4d/0xc0
> [   97.334410]  [<ffffffffa6a1ec88>] ? async_page_fault+0x28/0x30
> [   97.334411] Task in /system.slice/acpid.service killed as a result of limit of /system.slice/acpid.service
> [   97.334414] memory: usage 18446744073709551276kB, limit 9007199254740988kB, failcnt 121
> [   97.334414] memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
> [   97.334415] kmem: usage 52kB, limit 9007199254740988kB, failcnt 0
> [   97.334415] Memory cgroup stats for /system.slice/acpid.service: cache:0KB rss:0KB rss_huge:0KB mapped_file:0KB dirty:0KB writeback:0KB inactive_anon:0KB active_anon:0KB inactive_file:0KB active_file:0KB unevictable:0KB
> [   97.334422] [ pid ]   uid  tgid total_vm      rss nr_ptes nr_pmds swapents oom_score_adj name
> [   97.334637] [ 4179]     0  4179     1100      287       7       3       37             0 acpid
> [   97.334651] Memory cgroup out of memory: Kill process 4179 (acpid) score 0 or sacrifice child
> [   97.334695] Killed process 4179 (acpid) total-vm:4400kB, anon-rss:0kB, file-rss:1148kB, shmem-rss:0kB
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-16 14:13 ` Oleg Nesterov
@ 2016-08-16 14:25   ` Oleg Nesterov
  2016-08-16 14:34     ` Oleg Nesterov
  2016-08-16 14:37     ` Michal Hocko
  0 siblings, 2 replies; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-16 14:25 UTC (permalink / raw)
  To: Brenden Blanco, Johannes Weiner, Michal Hocko, Vladimir Davydov
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Alexei Starovoitov

On 08/16, Oleg Nesterov wrote:
>
> On 08/15, Brenden Blanco wrote:
> >
> > Hi folks,
> >
> > I think I have come across a memory leak in uprobes, which is fairly easy to
> > reproduce.
>
> At first glance this looks as a problem in memcg, add CC's...
>
> put_page(old_page) looks properly balanced, and I assume we do not need
> the additional "uncharge", we can rely on __page_cache_release().
>
> And I do not see any leak if I try to reproduce with CONFIG_MEMCG=n.

Heh. it seems that mem_cgroup_*() logic was always wrong in __replace_page().

Could you try the patch below?

Oleg.
---

--- x/kernel/events/uprobes.c
+++ x/kernel/events/uprobes.c
@@ -200,7 +200,8 @@ static int __replace_page(struct vm_area
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
+ 	if (err)
+		mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-16 14:25   ` Oleg Nesterov
@ 2016-08-16 14:34     ` Oleg Nesterov
  2016-08-16 16:55       ` Brenden Blanco
  2016-08-16 17:36       ` uprobes: memory leak in enable/disable loop Johannes Weiner
  2016-08-16 14:37     ` Michal Hocko
  1 sibling, 2 replies; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-16 14:34 UTC (permalink / raw)
  To: Brenden Blanco, Johannes Weiner, Michal Hocko, Vladimir Davydov
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Alexei Starovoitov

On 08/16, Oleg Nesterov wrote:
>
> On 08/16, Oleg Nesterov wrote:
> >
> > On 08/15, Brenden Blanco wrote:
> > >
> > > Hi folks,
> > >
> > > I think I have come across a memory leak in uprobes, which is fairly easy to
> > > reproduce.
> >
> > At first glance this looks as a problem in memcg, add CC's...
> >
> > put_page(old_page) looks properly balanced, and I assume we do not need
> > the additional "uncharge", we can rely on __page_cache_release().
> >
> > And I do not see any leak if I try to reproduce with CONFIG_MEMCG=n.
>
> Heh. it seems that mem_cgroup_*() logic was always wrong in __replace_page().

Yes, it seems this was broken by 00501b53 "mm: memcontrol: rewrite charge API".

> Could you try the patch below?

Please see v2 below. We don't need "cancel_charge" under "unlock:" at all.

Johannes, could you review?

Oleg.
---
--- x/kernel/events/uprobes.c
+++ x/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
 	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
+	if (!ptep) {
+		mem_cgroup_cancel_charge(kpage, memcg, false);
 		goto unlock;
+	}
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-16 14:25   ` Oleg Nesterov
  2016-08-16 14:34     ` Oleg Nesterov
@ 2016-08-16 14:37     ` Michal Hocko
  1 sibling, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2016-08-16 14:37 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Brenden Blanco, Johannes Weiner, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Alexei Starovoitov

On Tue 16-08-16 16:25:12, Oleg Nesterov wrote:
> On 08/16, Oleg Nesterov wrote:
> >
> > On 08/15, Brenden Blanco wrote:
> > >
> > > Hi folks,
> > >
> > > I think I have come across a memory leak in uprobes, which is fairly easy to
> > > reproduce.
> >
> > At first glance this looks as a problem in memcg, add CC's...
> >
> > put_page(old_page) looks properly balanced, and I assume we do not need
> > the additional "uncharge", we can rely on __page_cache_release().
> >
> > And I do not see any leak if I try to reproduce with CONFIG_MEMCG=n.
> 
> Heh. it seems that mem_cgroup_*() logic was always wrong in __replace_page().

Yes this seems broken since 00501b531c47 ("mm: memcontrol: rewrite
charge API")

> Could you try the patch below?

The patch looks good to me. Thanks!

> Oleg.
> ---
> 
> --- x/kernel/events/uprobes.c
> +++ x/kernel/events/uprobes.c
> @@ -200,7 +200,8 @@ static int __replace_page(struct vm_area
>  
>  	err = 0;
>   unlock:
> -	mem_cgroup_cancel_charge(kpage, memcg, false);
> + 	if (err)
> +		mem_cgroup_cancel_charge(kpage, memcg, false);
>  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
>  	unlock_page(page);
>  	return err;

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-16 14:34     ` Oleg Nesterov
@ 2016-08-16 16:55       ` Brenden Blanco
  2016-08-17 15:36         ` [PATCH 0/2] " Oleg Nesterov
  2016-08-16 17:36       ` uprobes: memory leak in enable/disable loop Johannes Weiner
  1 sibling, 1 reply; 12+ messages in thread
From: Brenden Blanco @ 2016-08-16 16:55 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Alexei Starovoitov

On Tue, Aug 16, 2016 at 04:34:08PM +0200, Oleg Nesterov wrote:
> On 08/16, Oleg Nesterov wrote:
> >
> > On 08/16, Oleg Nesterov wrote:
> > >
> > > On 08/15, Brenden Blanco wrote:
> > > >
> > > > Hi folks,
> > > >
> > > > I think I have come across a memory leak in uprobes, which is fairly easy to
> > > > reproduce.
> > >
> > > At first glance this looks as a problem in memcg, add CC's...
> > >
> > > put_page(old_page) looks properly balanced, and I assume we do not need
> > > the additional "uncharge", we can rely on __page_cache_release().
> > >
> > > And I do not see any leak if I try to reproduce with CONFIG_MEMCG=n.
> >
> > Heh. it seems that mem_cgroup_*() logic was always wrong in __replace_page().
> 
> Yes, it seems this was broken by 00501b53 "mm: memcontrol: rewrite charge API".
> 
> > Could you try the patch below?
> 
> Please see v2 below. We don't need "cancel_charge" under "unlock:" at all.
> 
> Johannes, could you review?
> 
> Oleg.
> ---
> --- x/kernel/events/uprobes.c
> +++ x/kernel/events/uprobes.c
> @@ -172,8 +172,10 @@ static int __replace_page(struct vm_area
>  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>  	err = -EAGAIN;
>  	ptep = page_check_address(page, mm, addr, &ptl, 0);
> -	if (!ptep)
> +	if (!ptep) {
> +		mem_cgroup_cancel_charge(kpage, memcg, false);
>  		goto unlock;
> +	}
>  
>  	get_page(kpage);
>  	page_add_new_anon_rmap(kpage, vma, addr, false);
> @@ -200,7 +202,6 @@ static int __replace_page(struct vm_area
>  
>  	err = 0;
>   unlock:
> -	mem_cgroup_cancel_charge(kpage, memcg, false);
>  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
>  	unlock_page(page);
>  	return err;
> 
This passes my tests, thanks!

Please note that I applied this to 4.4.15+ubuntu-patches kernel, since that
was what I had most handy, therefore I had to adjust the patch to remove the
unavailable 'compound' bool parameter in 4.4 kernels.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: uprobes: memory leak in enable/disable loop
  2016-08-16 14:34     ` Oleg Nesterov
  2016-08-16 16:55       ` Brenden Blanco
@ 2016-08-16 17:36       ` Johannes Weiner
  1 sibling, 0 replies; 12+ messages in thread
From: Johannes Weiner @ 2016-08-16 17:36 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Brenden Blanco, Michal Hocko, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Alexei Starovoitov

On Tue, Aug 16, 2016 at 04:34:08PM +0200, Oleg Nesterov wrote:
> @@ -172,8 +172,10 @@ static int __replace_page(struct vm_area
>  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
>  	err = -EAGAIN;
>  	ptep = page_check_address(page, mm, addr, &ptl, 0);
> -	if (!ptep)
> +	if (!ptep) {
> +		mem_cgroup_cancel_charge(kpage, memcg, false);
>  		goto unlock;
> +	}
>  
>  	get_page(kpage);
>  	page_add_new_anon_rmap(kpage, vma, addr, false);
> @@ -200,7 +202,6 @@ static int __replace_page(struct vm_area
>  
>  	err = 0;
>   unlock:
> -	mem_cgroup_cancel_charge(kpage, memcg, false);

Ouch. I must have mistaken this for an exclusive error path.

The patch looks good, thank you.

Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 00501b531c47 ("mm: memcontrol: rewrite charge API")
Cc: stable@vger.kernel.org # 3.17+

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 0/2] uprobes: memory leak in enable/disable loop
  2016-08-16 16:55       ` Brenden Blanco
@ 2016-08-17 15:36         ` Oleg Nesterov
  2016-08-17 15:36           ` [PATCH 1/2] uprobes: fix the memcg accounting Oleg Nesterov
  2016-08-17 15:37           ` [PATCH 2/2] uprobes: rename the "struct page *" args of __replace_page() Oleg Nesterov
  0 siblings, 2 replies; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-17 15:36 UTC (permalink / raw)
  To: Brenden Blanco, Ingo Molnar
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Alexander Shishkin,
	Alexei Starovoitov

On 08/16, Brenden Blanco wrote:
>
> This passes my tests, thanks!

Great, thanks Brenden!

> Please note that I applied this to 4.4.15+ubuntu-patches kernel, since that
> was what I had most handy, therefore I had to adjust the patch to remove the
> unavailable 'compound' bool parameter in 4.4 kernels.

Yes, sure, this is correct.

Ingo, could you take this fix? IMO 1/2 is 4.8 and -stable matterial.

As for 2/2 it is purely cosmetic but I simply can't resist, I wanted
to make this change every time I had to look at __replace_page().

Oleg.

 kernel/events/uprobes.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] uprobes: fix the memcg accounting
  2016-08-17 15:36         ` [PATCH 0/2] " Oleg Nesterov
@ 2016-08-17 15:36           ` Oleg Nesterov
  2016-08-18  8:07             ` [tip:perf/urgent] uprobes: Fix " tip-bot for Oleg Nesterov
  2016-08-17 15:37           ` [PATCH 2/2] uprobes: rename the "struct page *" args of __replace_page() Oleg Nesterov
  1 sibling, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-17 15:36 UTC (permalink / raw)
  To: Brenden Blanco, Ingo Molnar
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Alexander Shishkin,
	Alexei Starovoitov

__replace_page() wronlgy calls mem_cgroup_cancel_charge() in "success" path,
it should only do this if page_check_address() fails.

This means that every enable/disable leads to unbalanced mem_cgroup_uncharge()
from put_page(old_page), it is trivial to underflow the page_counter->count
and trigger OOM.

Reported-and-tested-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michal Hocko <mhocko@kernel.org>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 00501b531c47 ("mm: memcontrol: rewrite charge API")
Cc: stable@vger.kernel.org # 3.17+
---
 kernel/events/uprobes.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b7a525a..8c50276 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
 	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
+	if (!ptep) {
+		mem_cgroup_cancel_charge(kpage, memcg, false);
 		goto unlock;
+	}
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] uprobes: rename the "struct page *" args of __replace_page()
  2016-08-17 15:36         ` [PATCH 0/2] " Oleg Nesterov
  2016-08-17 15:36           ` [PATCH 1/2] uprobes: fix the memcg accounting Oleg Nesterov
@ 2016-08-17 15:37           ` Oleg Nesterov
  2016-08-18 10:49             ` [tip:perf/core] uprobes: Rename " tip-bot for Oleg Nesterov
  1 sibling, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2016-08-17 15:37 UTC (permalink / raw)
  To: Brenden Blanco, Ingo Molnar
  Cc: Johannes Weiner, Michal Hocko, Vladimir Davydov, linux-kernel,
	Peter Zijlstra, Arnaldo Carvalho de Melo, Alexander Shishkin,
	Alexei Starovoitov

Purely cosmetic, no changes in the compiled code.

Perhaps it is just me but I can hardly read __replace_page() because I can't
distinguish "page" from "kpage" and because I need to look at the caller to
to ensure that, say, kpage is really the new page and the code is correct.
Rename them to old_page and new_page, this matches the caller.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/events/uprobes.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c50276..d4129bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  * Returns 0 on success, -EFAULT on failure.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
-				struct page *page, struct page *kpage)
+				struct page *old_page, struct page *new_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
@@ -161,49 +161,49 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
-	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
 			false);
 	if (err)
 		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
-	lock_page(page);
+	lock_page(old_page);
 
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
-	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	ptep = page_check_address(old_page, mm, addr, &ptl, 0);
 	if (!ptep) {
-		mem_cgroup_cancel_charge(kpage, memcg, false);
+		mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
 	}
 
-	get_page(kpage);
-	page_add_new_anon_rmap(kpage, vma, addr, false);
-	mem_cgroup_commit_charge(kpage, memcg, false, false);
-	lru_cache_add_active_or_unevictable(kpage, vma);
+	get_page(new_page);
+	page_add_new_anon_rmap(new_page, vma, addr, false);
+	mem_cgroup_commit_charge(new_page, memcg, false, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 
-	if (!PageAnon(page)) {
-		dec_mm_counter(mm, mm_counter_file(page));
+	if (!PageAnon(old_page)) {
+		dec_mm_counter(mm, mm_counter_file(old_page));
 		inc_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
 	ptep_clear_flush_notify(vma, addr, ptep);
-	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+	set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(page, false);
-	if (!page_mapped(page))
-		try_to_free_swap(page);
+	page_remove_rmap(old_page, false);
+	if (!page_mapped(old_page))
+		try_to_free_swap(old_page);
 	pte_unmap_unlock(ptep, ptl);
 
 	if (vma->vm_flags & VM_LOCKED)
-		munlock_vma_page(page);
-	put_page(page);
+		munlock_vma_page(old_page);
+	put_page(old_page);
 
 	err = 0;
  unlock:
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	unlock_page(page);
+	unlock_page(old_page);
 	return err;
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/urgent] uprobes: Fix the memcg accounting
  2016-08-17 15:36           ` [PATCH 1/2] uprobes: fix the memcg accounting Oleg Nesterov
@ 2016-08-18  8:07             ` tip-bot for Oleg Nesterov
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Oleg Nesterov @ 2016-08-18  8:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hpa, vdavydov, oleg, mingo, alexander.shishkin, torvalds,
	bblanco, tglx, acme, jolsa, acme, mhocko, hannes,
	alexei.starovoitov, peterz, linux-kernel

Commit-ID:  6c4687cc17a788a6dd8de3e27dbeabb7cbd3e066
Gitweb:     http://git.kernel.org/tip/6c4687cc17a788a6dd8de3e27dbeabb7cbd3e066
Author:     Oleg Nesterov <oleg@redhat.com>
AuthorDate: Wed, 17 Aug 2016 17:36:29 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 10:03:26 +0200

uprobes: Fix the memcg accounting

__replace_page() wronlgy calls mem_cgroup_cancel_charge() in "success" path,
it should only do this if page_check_address() fails.

This means that every enable/disable leads to unbalanced mem_cgroup_uncharge()
from put_page(old_page), it is trivial to underflow the page_counter->count
and trigger OOM.

Reported-and-tested-by: Brenden Blanco <bblanco@plumgrid.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: stable@vger.kernel.org # 3.17+
Fixes: 00501b531c47 ("mm: memcontrol: rewrite charge API")
Link: http://lkml.kernel.org/r/20160817153629.GB29724@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b7a525a..8c50276 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
 	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
+	if (!ptep) {
+		mem_cgroup_cancel_charge(kpage, memcg, false);
 		goto unlock;
+	}
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] uprobes: Rename the "struct page *" args of __replace_page()
  2016-08-17 15:37           ` [PATCH 2/2] uprobes: rename the "struct page *" args of __replace_page() Oleg Nesterov
@ 2016-08-18 10:49             ` tip-bot for Oleg Nesterov
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Oleg Nesterov @ 2016-08-18 10:49 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bblanco, mingo, torvalds, alexei.starovoitov, acme,
	alexander.shishkin, oleg, acme, tglx, hpa, linux-kernel,
	vdavydov, mhocko, jolsa, peterz, hannes

Commit-ID:  bdfaa2eecd5f6ca0cb5cff2bc7a974a15a2fd21b
Gitweb:     http://git.kernel.org/tip/bdfaa2eecd5f6ca0cb5cff2bc7a974a15a2fd21b
Author:     Oleg Nesterov <oleg@redhat.com>
AuthorDate: Wed, 17 Aug 2016 17:37:04 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 18 Aug 2016 10:03:50 +0200

uprobes: Rename the "struct page *" args of __replace_page()

Purely cosmetic, no changes in the compiled code.

Perhaps it is just me but I can hardly read __replace_page() because I can't
distinguish "page" from "kpage" and because I need to look at the caller to
to ensure that, say, kpage is really the new page and the code is correct.
Rename them to old_page and new_page, this matches the caller.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Brenden Blanco <bblanco@plumgrid.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Link: http://lkml.kernel.org/r/20160817153704.GC29724@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8c50276..d4129bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  * Returns 0 on success, -EFAULT on failure.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
-				struct page *page, struct page *kpage)
+				struct page *old_page, struct page *new_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
@@ -161,49 +161,49 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
-	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
 			false);
 	if (err)
 		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
-	lock_page(page);
+	lock_page(old_page);
 
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
-	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	ptep = page_check_address(old_page, mm, addr, &ptl, 0);
 	if (!ptep) {
-		mem_cgroup_cancel_charge(kpage, memcg, false);
+		mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
 	}
 
-	get_page(kpage);
-	page_add_new_anon_rmap(kpage, vma, addr, false);
-	mem_cgroup_commit_charge(kpage, memcg, false, false);
-	lru_cache_add_active_or_unevictable(kpage, vma);
+	get_page(new_page);
+	page_add_new_anon_rmap(new_page, vma, addr, false);
+	mem_cgroup_commit_charge(new_page, memcg, false, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 
-	if (!PageAnon(page)) {
-		dec_mm_counter(mm, mm_counter_file(page));
+	if (!PageAnon(old_page)) {
+		dec_mm_counter(mm, mm_counter_file(old_page));
 		inc_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
 	ptep_clear_flush_notify(vma, addr, ptep);
-	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+	set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(page, false);
-	if (!page_mapped(page))
-		try_to_free_swap(page);
+	page_remove_rmap(old_page, false);
+	if (!page_mapped(old_page))
+		try_to_free_swap(old_page);
 	pte_unmap_unlock(ptep, ptl);
 
 	if (vma->vm_flags & VM_LOCKED)
-		munlock_vma_page(page);
-	put_page(page);
+		munlock_vma_page(old_page);
+	put_page(old_page);
 
 	err = 0;
  unlock:
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	unlock_page(page);
+	unlock_page(old_page);
 	return err;
 }
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-08-18 10:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-15 20:58 uprobes: memory leak in enable/disable loop Brenden Blanco
2016-08-16 14:13 ` Oleg Nesterov
2016-08-16 14:25   ` Oleg Nesterov
2016-08-16 14:34     ` Oleg Nesterov
2016-08-16 16:55       ` Brenden Blanco
2016-08-17 15:36         ` [PATCH 0/2] " Oleg Nesterov
2016-08-17 15:36           ` [PATCH 1/2] uprobes: fix the memcg accounting Oleg Nesterov
2016-08-18  8:07             ` [tip:perf/urgent] uprobes: Fix " tip-bot for Oleg Nesterov
2016-08-17 15:37           ` [PATCH 2/2] uprobes: rename the "struct page *" args of __replace_page() Oleg Nesterov
2016-08-18 10:49             ` [tip:perf/core] uprobes: Rename " tip-bot for Oleg Nesterov
2016-08-16 17:36       ` uprobes: memory leak in enable/disable loop Johannes Weiner
2016-08-16 14:37     ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).