* [PATCH 1/2] mm, memcontrol: Move swap charge handling into get_swap_page()
2018-03-24 16:51 [PATCHSET] mm, memcontrol: Implement memory.swap.events Tejun Heo
@ 2018-03-24 16:51 ` Tejun Heo
2018-03-24 16:51 ` [PATCH 2/2] mm, memcontrol: Implement memory.swap.events Tejun Heo
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2018-03-24 16:51 UTC (permalink / raw)
To: hannes, mhocko, vdavydov.dev
Cc: guro, riel, akpm, linux-kernel, kernel-team, cgroups, linux-mm,
Tejun Heo
get_swap_page() is always followed by mem_cgroup_try_charge_swap().
This patch moves mem_cgroup_try_charge_swap() into get_swap_page() and
makes get_swap_page() call the function even after swap allocation
failure.
This simplifies the callers and consolidates memcg related logic and
will ease adding swap related memcg events.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
mm/memcontrol.c | 3 +++
mm/shmem.c | 4 ----
mm/swap_slots.c | 10 +++++++---
mm/swap_state.c | 3 ---
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5bf01d..9f9c8a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5987,6 +5987,9 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (!memcg)
return 0;
+ if (!entry.val)
+ return 0;
+
memcg = mem_cgroup_id_get_online(memcg);
if (!mem_cgroup_is_root(memcg) &&
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688..4a07d21 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1313,9 +1313,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (!swap.val)
goto redirty;
- if (mem_cgroup_try_charge_swap(page, swap))
- goto free_swap;
-
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
* if it's not already there. Do it now before the page is
@@ -1344,7 +1341,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
mutex_unlock(&shmem_swaplist_mutex);
-free_swap:
put_swap_page(page, swap);
redirty:
set_page_dirty(page);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index bebc192..7546eb2 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -319,7 +319,7 @@ swp_entry_t get_swap_page(struct page *page)
if (PageTransHuge(page)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
get_swap_pages(1, true, &entry);
- return entry;
+ goto out;
}
/*
@@ -349,11 +349,15 @@ swp_entry_t get_swap_page(struct page *page)
}
mutex_unlock(&cache->alloc_lock);
if (entry.val)
- return entry;
+ goto out;
}
get_swap_pages(1, false, &entry);
-
+out:
+ if (mem_cgroup_try_charge_swap(page, entry)) {
+ put_swap_page(page, entry);
+ entry.val = 0;
+ }
return entry;
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 39ae7cf..41f0809 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -216,9 +216,6 @@ int add_to_swap(struct page *page)
if (!entry.val)
return 0;
- if (mem_cgroup_try_charge_swap(page, entry))
- goto fail;
-
/*
* Radix-tree node allocations from PF_MEMALLOC contexts could
* completely exhaust the page allocator. __GFP_NOMEMALLOC
--
2.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] mm, memcontrol: Implement memory.swap.events
2018-03-24 16:51 [PATCHSET] mm, memcontrol: Implement memory.swap.events Tejun Heo
2018-03-24 16:51 ` [PATCH 1/2] mm, memcontrol: Move swap charge handling into get_swap_page() Tejun Heo
@ 2018-03-24 16:51 ` Tejun Heo
2018-03-26 21:39 ` [PATCHSET] " Andrew Morton
2018-04-12 14:13 ` Michal Hocko
3 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2018-03-24 16:51 UTC (permalink / raw)
To: hannes, mhocko, vdavydov.dev
Cc: guro, riel, akpm, linux-kernel, kernel-team, cgroups, linux-mm,
Tejun Heo, linux-api
Add swap max and fail events so that userland can monitor and respond
to running out of swap.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-api@vger.kernel.org
---
Documentation/cgroup-v2.txt | 16 ++++++++++++++++
include/linux/memcontrol.h | 5 +++++
mm/memcontrol.c | 24 +++++++++++++++++++++++-
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 74cdeae..b0dda10 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1199,6 +1199,22 @@ PAGE_SIZE multiple when read back.
Swap usage hard limit. If a cgroup's swap usage reaches this
limit, anonymous memory of the cgroup will not be swapped out.
+ memory.swap.events
+ A read-only flat-keyed file which exists on non-root cgroups.
+ The following entries are defined. Unless specified
+ otherwise, a value change in this file generates a file
+ modified event.
+
+ max
+ The number of times the cgroup's swap usage was about
+ to go over the max boundary and swap allocation
+ failed.
+
+ fail
+ The number of times swap allocation failed either
+ because of running out of swap system-wide or max
+ limit.
+
Usage Guidelines
~~~~~~~~~~~~~~~~
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 85a8f00..f198339 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,6 +54,8 @@ enum memcg_event_item {
MEMCG_HIGH,
MEMCG_MAX,
MEMCG_OOM,
+ MEMCG_SWAP_MAX,
+ MEMCG_SWAP_FAIL,
MEMCG_NR_EVENTS,
};
@@ -202,6 +204,9 @@ struct mem_cgroup {
/* handle for "memory.events" */
struct cgroup_file events_file;
+ /* handle for "memory.swap.events" */
+ struct cgroup_file swap_events_file;
+
/* protect arrays of thresholds */
struct mutex thresholds_lock;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f9c8a7..1a14d4a4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5987,13 +5987,17 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (!memcg)
return 0;
- if (!entry.val)
+ if (!entry.val) {
+ mem_cgroup_event(memcg, MEMCG_SWAP_FAIL);
return 0;
+ }
memcg = mem_cgroup_id_get_online(memcg);
if (!mem_cgroup_is_root(memcg) &&
!page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
+ mem_cgroup_event(memcg, MEMCG_SWAP_MAX);
+ mem_cgroup_event(memcg, MEMCG_SWAP_FAIL);
mem_cgroup_id_put(memcg);
return -ENOMEM;
}
@@ -6131,6 +6135,18 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
return nbytes;
}
+static int swap_events_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+ memcg_stat_flush(memcg);
+
+ seq_printf(m, "max %llu\n", memcg->events[MEMCG_SWAP_MAX]);
+ seq_printf(m, "fail %llu\n", memcg->events[MEMCG_SWAP_FAIL]);
+
+ return 0;
+}
+
static struct cftype swap_files[] = {
{
.name = "swap.current",
@@ -6143,6 +6159,12 @@ static struct cftype swap_files[] = {
.seq_show = swap_max_show,
.write = swap_max_write,
},
+ {
+ .name = "swap.events",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .file_offset = offsetof(struct mem_cgroup, swap_events_file),
+ .seq_show = swap_events_show,
+ },
{ } /* terminate */
};
--
2.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCHSET] mm, memcontrol: Implement memory.swap.events
2018-03-24 16:51 [PATCHSET] mm, memcontrol: Implement memory.swap.events Tejun Heo
2018-03-24 16:51 ` [PATCH 1/2] mm, memcontrol: Move swap charge handling into get_swap_page() Tejun Heo
2018-03-24 16:51 ` [PATCH 2/2] mm, memcontrol: Implement memory.swap.events Tejun Heo
@ 2018-03-26 21:39 ` Andrew Morton
2018-03-27 13:52 ` Tejun Heo
2018-04-12 14:13 ` Michal Hocko
3 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2018-03-26 21:39 UTC (permalink / raw)
To: Tejun Heo
Cc: hannes, mhocko, vdavydov.dev, guro, riel, linux-kernel,
kernel-team, cgroups, linux-mm
On Sat, 24 Mar 2018 09:51:25 -0700 Tejun Heo <tj@kernel.org> wrote:
> This patchset implements memory.swap.events which contains max and
> fail events so that userland can monitor and respond to swap running
> out. It contains the following two patches.
>
> 0001-mm-memcontrol-Move-swap-charge-handling-into-get_swa.patch
> 0002-mm-memcontrol-Implement-memory.swap.events.patch
>
> This patchset is on top of the "cgroup/for-4.17: Make cgroup_rstat
> available to controllers" patchset[1] and "mm, memcontrol: Make
> cgroup_rstat available to controllers" patchset[2] and also available
> in the following git branch.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-memcg-swap.events
This doesn't appear to be in linux-next yet. It should be by now if it's
targeted at 4.17?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET] mm, memcontrol: Implement memory.swap.events
2018-03-26 21:39 ` [PATCHSET] " Andrew Morton
@ 2018-03-27 13:52 ` Tejun Heo
0 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2018-03-27 13:52 UTC (permalink / raw)
To: Andrew Morton
Cc: hannes, mhocko, vdavydov.dev, guro, riel, linux-kernel,
kernel-team, cgroups, linux-mm
On Mon, Mar 26, 2018 at 02:39:31PM -0700, Andrew Morton wrote:
> On Sat, 24 Mar 2018 09:51:25 -0700 Tejun Heo <tj@kernel.org> wrote:
>
> > This patchset implements memory.swap.events which contains max and
> > fail events so that userland can monitor and respond to swap running
> > out. It contains the following two patches.
> >
> > 0001-mm-memcontrol-Move-swap-charge-handling-into-get_swa.patch
> > 0002-mm-memcontrol-Implement-memory.swap.events.patch
> >
> > This patchset is on top of the "cgroup/for-4.17: Make cgroup_rstat
> > available to controllers" patchset[1] and "mm, memcontrol: Make
> > cgroup_rstat available to controllers" patchset[2] and also available
> > in the following git branch.
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-memcg-swap.events
>
> This doesn't appear to be in linux-next yet. It should be by now if it's
> targeted at 4.17?
You're right. It's too late for 4.17. Let's aim for 4.18.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET] mm, memcontrol: Implement memory.swap.events
2018-03-24 16:51 [PATCHSET] mm, memcontrol: Implement memory.swap.events Tejun Heo
` (2 preceding siblings ...)
2018-03-26 21:39 ` [PATCHSET] " Andrew Morton
@ 2018-04-12 14:13 ` Michal Hocko
2018-04-12 15:38 ` Tejun Heo
3 siblings, 1 reply; 8+ messages in thread
From: Michal Hocko @ 2018-04-12 14:13 UTC (permalink / raw)
To: Tejun Heo
Cc: hannes, vdavydov.dev, guro, riel, akpm, linux-kernel,
kernel-team, cgroups, linux-mm
Hi Tejun,
sorry for the late response. Are you plannig to repost?
On Sat 24-03-18 09:51:25, Tejun Heo wrote:
> Hello,
>
> This patchset implements memory.swap.events which contains max and
> fail events so that userland can monitor and respond to swap running
> out. It contains the following two patches.
>
> 0001-mm-memcontrol-Move-swap-charge-handling-into-get_swa.patch
> 0002-mm-memcontrol-Implement-memory.swap.events.patch
>
> This patchset is on top of the "cgroup/for-4.17: Make cgroup_rstat
> available to controllers" patchset[1] and "mm, memcontrol: Make
> cgroup_rstat available to controllers" patchset[2] and also available
> in the following git branch.
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-memcg-swap.events
>
> diffstat follows.
>
> Documentation/cgroup-v2.txt | 16 ++++++++++++++++
> include/linux/memcontrol.h | 5 +++++
> mm/memcontrol.c | 25 +++++++++++++++++++++++++
> mm/shmem.c | 4 ----
> mm/swap_slots.c | 10 +++++++---
> mm/swap_state.c | 3 ---
> 6 files changed, 53 insertions(+), 10 deletions(-)
>
> Thanks.
>
> --
> tejun
>
> [1] http://lkml.kernel.org/r/20180323231313.1254142-1-tj@kernel.org
> [2] http://lkml.kernel.org/r/20180324160901.512135-1-tj@kernel.org
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCHSET] mm, memcontrol: Implement memory.swap.events
2018-04-12 14:13 ` Michal Hocko
@ 2018-04-12 15:38 ` Tejun Heo
0 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2018-04-12 15:38 UTC (permalink / raw)
To: Michal Hocko
Cc: hannes, vdavydov.dev, guro, riel, akpm, linux-kernel,
kernel-team, cgroups, linux-mm
On Thu, Apr 12, 2018 at 04:13:45PM +0200, Michal Hocko wrote:
> Hi Tejun,
> sorry for the late response. Are you plannig to repost?
Yeah, will do later.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/2] mm, memcontrol: Move swap charge handling into get_swap_page()
2018-04-16 23:09 [PATCHSET v2] " Tejun Heo
@ 2018-04-16 23:09 ` Tejun Heo
0 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2018-04-16 23:09 UTC (permalink / raw)
To: hannes, mhocko, vdavydov.dev
Cc: guro, riel, akpm, linux-kernel, kernel-team, cgroups, linux-mm
get_swap_page() is always followed by mem_cgroup_try_charge_swap().
This patch moves mem_cgroup_try_charge_swap() into get_swap_page() and
makes get_swap_page() call the function even after swap allocation
failure.
This simplifies the callers and consolidates memcg related logic and
will ease adding swap related memcg events.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
mm/memcontrol.c | 3 +++
mm/shmem.c | 4 ----
mm/swap_slots.c | 10 +++++++---
mm/swap_state.c | 3 ---
4 files changed, 10 insertions(+), 10 deletions(-)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6012,6 +6012,9 @@ int mem_cgroup_try_charge_swap(struct pa
if (!memcg)
return 0;
+ if (!entry.val)
+ return 0;
+
memcg = mem_cgroup_id_get_online(memcg);
if (!mem_cgroup_is_root(memcg) &&
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1322,9 +1322,6 @@ static int shmem_writepage(struct page *
if (!swap.val)
goto redirty;
- if (mem_cgroup_try_charge_swap(page, swap))
- goto free_swap;
-
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
* if it's not already there. Do it now before the page is
@@ -1353,7 +1350,6 @@ static int shmem_writepage(struct page *
}
mutex_unlock(&shmem_swaplist_mutex);
-free_swap:
put_swap_page(page, swap);
redirty:
set_page_dirty(page);
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -317,7 +317,7 @@ swp_entry_t get_swap_page(struct page *p
if (PageTransHuge(page)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
get_swap_pages(1, true, &entry);
- return entry;
+ goto out;
}
/*
@@ -347,10 +347,14 @@ repeat:
}
mutex_unlock(&cache->alloc_lock);
if (entry.val)
- return entry;
+ goto out;
}
get_swap_pages(1, false, &entry);
-
+out:
+ if (mem_cgroup_try_charge_swap(page, entry)) {
+ put_swap_page(page, entry);
+ entry.val = 0;
+ }
return entry;
}
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -216,9 +216,6 @@ int add_to_swap(struct page *page)
if (!entry.val)
return 0;
- if (mem_cgroup_try_charge_swap(page, entry))
- goto fail;
-
/*
* Radix-tree node allocations from PF_MEMALLOC contexts could
* completely exhaust the page allocator. __GFP_NOMEMALLOC
^ permalink raw reply [flat|nested] 8+ messages in thread