All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-08 15:30 ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Hi Andrew,

there was some build breakage in CONFIG_ combinations I hadn't tested
in the last revision, so here is a fixed-up resend with minimal CC
list. The only difference to the previous version is a section in
memcontrol.h, but it accumulates throughout the series and would have
been a pain to resolve on your end. So here goes. This also includes
the review tags that Dave and Vlad had sent out in the meantime.

Difference to the original v4:

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9a19590..189f04d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -702,14 +702,14 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
-#ifdef CONFIG_INET
 struct sock;
-extern struct static_key_false memcg_sockets_enabled_key;
-#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
+extern struct static_key_false memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
@@ -724,7 +724,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 }
 #else
 #define mem_cgroup_sockets_enabled 0
-#endif /* CONFIG_INET */
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+{
+	return false;
+}
+#endif
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index a77b142..3347cc3 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -43,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
 extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
 					struct eventfd_ctx *eventfd);
 #else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			      unsigned long scanned, unsigned long reclaimed) {}
 static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
 				   int prio) {}


 Documentation/kernel-parameters.txt |   4 +
 include/linux/memcontrol.h          |  75 ++++++---
 include/linux/vmpressure.h          |   7 +-
 include/net/sock.h                  | 149 ++---------------
 include/net/tcp.h                   |   5 +-
 include/net/tcp_memcontrol.h        |   1 -
 mm/backing-dev.c                    |   2 +-
 mm/memcontrol.c                     | 302 ++++++++++++++++++++++------------
 mm/vmpressure.c                     |  78 ++++++---
 mm/vmscan.c                         |  10 +-
 net/core/sock.c                     |  78 ++-------
 net/ipv4/tcp.c                      |   3 +-
 net/ipv4/tcp_ipv4.c                 |   9 +-
 net/ipv4/tcp_memcontrol.c           |  82 ++++-----
 net/ipv4/tcp_output.c               |   7 +-
 net/ipv6/tcp_ipv6.c                 |   3 -
 16 files changed, 391 insertions(+), 424 deletions(-)


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-08 15:30 ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Hi Andrew,

there was some build breakage in CONFIG_ combinations I hadn't tested
in the last revision, so here is a fixed-up resend with minimal CC
list. The only difference to the previous version is a section in
memcontrol.h, but it accumulates throughout the series and would have
been a pain to resolve on your end. So here goes. This also includes
the review tags that Dave and Vlad had sent out in the meantime.

Difference to the original v4:

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9a19590..189f04d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -702,14 +702,14 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
-#ifdef CONFIG_INET
 struct sock;
-extern struct static_key_false memcg_sockets_enabled_key;
-#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
+extern struct static_key_false memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
@@ -724,7 +724,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 }
 #else
 #define mem_cgroup_sockets_enabled 0
-#endif /* CONFIG_INET */
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
+{
+	return false;
+}
+#endif
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index a77b142..3347cc3 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -43,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
 extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
 					struct eventfd_ctx *eventfd);
 #else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			      unsigned long scanned, unsigned long reclaimed) {}
 static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
 				   int prio) {}


 Documentation/kernel-parameters.txt |   4 +
 include/linux/memcontrol.h          |  75 ++++++---
 include/linux/vmpressure.h          |   7 +-
 include/net/sock.h                  | 149 ++---------------
 include/net/tcp.h                   |   5 +-
 include/net/tcp_memcontrol.h        |   1 -
 mm/backing-dev.c                    |   2 +-
 mm/memcontrol.c                     | 302 ++++++++++++++++++++++------------
 mm/vmpressure.c                     |  78 ++++++---
 mm/vmscan.c                         |  10 +-
 net/core/sock.c                     |  78 ++-------
 net/ipv4/tcp.c                      |   3 +-
 net/ipv4/tcp_ipv4.c                 |   9 +-
 net/ipv4/tcp_memcontrol.c           |  82 ++++-----
 net/ipv4/tcp_output.c               |   7 +-
 net/ipv6/tcp_ipv6.c                 |   3 -
 16 files changed, 391 insertions(+), 424 deletions(-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 01/14] mm: memcontrol: export root_mem_cgroup
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

A later patch will need this symbol in files other than memcontrol.c,
so export it now and replace mem_cgroup_root_css at the same time.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h | 3 ++-
 mm/backing-dev.c           | 2 +-
 mm/memcontrol.c            | 5 ++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9d5472b..320b690 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -265,7 +265,8 @@ struct mem_cgroup {
 	struct mem_cgroup_per_node *nodeinfo[0];
 	/* WARNING: nodeinfo must be the last member here */
 };
-extern struct cgroup_subsys_state *mem_cgroup_root_css;
+
+extern struct mem_cgroup *root_mem_cgroup;
 
 /**
  * mem_cgroup_events - count memory events against a cgroup
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9160853..fdc6f4d 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -707,7 +707,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
 	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (!ret) {
-		bdi->wb.memcg_css = mem_cgroup_root_css;
+		bdi->wb.memcg_css = &root_mem_cgroup->css;
 		bdi->wb.blkcg_css = blkcg_root_css;
 	}
 	return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 79a29d5..f6ea649 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -76,9 +76,9 @@
 struct cgroup_subsys memory_cgrp_subsys __read_mostly;
 EXPORT_SYMBOL(memory_cgrp_subsys);
 
+struct mem_cgroup *root_mem_cgroup __read_mostly;
+
 #define MEM_CGROUP_RECLAIM_RETRIES	5
-static struct mem_cgroup *root_mem_cgroup __read_mostly;
-struct cgroup_subsys_state *mem_cgroup_root_css __read_mostly;
 
 /* Whether the swap controller is active */
 #ifdef CONFIG_MEMCG_SWAP
@@ -4217,7 +4217,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	/* root ? */
 	if (parent_css == NULL) {
 		root_mem_cgroup = memcg;
-		mem_cgroup_root_css = &memcg->css;
 		page_counter_init(&memcg->memory, NULL);
 		memcg->high = PAGE_COUNTER_MAX;
 		memcg->soft_limit = PAGE_COUNTER_MAX;
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 01/14] mm: memcontrol: export root_mem_cgroup
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

A later patch will need this symbol in files other than memcontrol.c,
so export it now and replace mem_cgroup_root_css at the same time.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h | 3 ++-
 mm/backing-dev.c           | 2 +-
 mm/memcontrol.c            | 5 ++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9d5472b..320b690 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -265,7 +265,8 @@ struct mem_cgroup {
 	struct mem_cgroup_per_node *nodeinfo[0];
 	/* WARNING: nodeinfo must be the last member here */
 };
-extern struct cgroup_subsys_state *mem_cgroup_root_css;
+
+extern struct mem_cgroup *root_mem_cgroup;
 
 /**
  * mem_cgroup_events - count memory events against a cgroup
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 9160853..fdc6f4d 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -707,7 +707,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
 	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (!ret) {
-		bdi->wb.memcg_css = mem_cgroup_root_css;
+		bdi->wb.memcg_css = &root_mem_cgroup->css;
 		bdi->wb.blkcg_css = blkcg_root_css;
 	}
 	return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 79a29d5..f6ea649 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -76,9 +76,9 @@
 struct cgroup_subsys memory_cgrp_subsys __read_mostly;
 EXPORT_SYMBOL(memory_cgrp_subsys);
 
+struct mem_cgroup *root_mem_cgroup __read_mostly;
+
 #define MEM_CGROUP_RECLAIM_RETRIES	5
-static struct mem_cgroup *root_mem_cgroup __read_mostly;
-struct cgroup_subsys_state *mem_cgroup_root_css __read_mostly;
 
 /* Whether the swap controller is active */
 #ifdef CONFIG_MEMCG_SWAP
@@ -4217,7 +4217,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	/* root ? */
 	if (parent_css == NULL) {
 		root_mem_cgroup = memcg;
-		mem_cgroup_root_css = &memcg->css;
 		page_counter_init(&memcg->memory, NULL);
 		memcg->high = PAGE_COUNTER_MAX;
 		memcg->soft_limit = PAGE_COUNTER_MAX;
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 02/14] net: tcp_memcontrol: properly detect ancestor socket pressure
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

When charging socket memory, the code currently checks only the local
page counter for excess to determine whether the memcg is under socket
pressure. But even if the local counter is fine, one of the ancestors
could have breached its limit, which should also force this child to
enter socket pressure. This currently doesn't happen.

Fix this by using page_counter_try_charge() first. If that fails, it
means that either the local counter or one of the ancestors are in
excess of their limit, and the child should enter socket pressure.

Fixes: 3e32cb2e0a12 ("mm: memcontrol: lockless page counters")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/net/sock.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 7f89e4b..8133c71 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1190,11 +1190,13 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 					      unsigned long amt,
 					      int *parent_status)
 {
-	page_counter_charge(&prot->memory_allocated, amt);
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&prot->memory_allocated, amt, &counter))
+		return;
 
-	if (page_counter_read(&prot->memory_allocated) >
-	    prot->memory_allocated.limit)
-		*parent_status = OVER_LIMIT;
+	page_counter_charge(&prot->memory_allocated, amt);
+	*parent_status = OVER_LIMIT;
 }
 
 static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 02/14] net: tcp_memcontrol: properly detect ancestor socket pressure
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

When charging socket memory, the code currently checks only the local
page counter for excess to determine whether the memcg is under socket
pressure. But even if the local counter is fine, one of the ancestors
could have breached its limit, which should also force this child to
enter socket pressure. This currently doesn't happen.

Fix this by using page_counter_try_charge() first. If that fails, it
means that either the local counter or one of the ancestors are in
excess of their limit, and the child should enter socket pressure.

Fixes: 3e32cb2e0a12 ("mm: memcontrol: lockless page counters")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/net/sock.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 7f89e4b..8133c71 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1190,11 +1190,13 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 					      unsigned long amt,
 					      int *parent_status)
 {
-	page_counter_charge(&prot->memory_allocated, amt);
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&prot->memory_allocated, amt, &counter))
+		return;
 
-	if (page_counter_read(&prot->memory_allocated) >
-	    prot->memory_allocated.limit)
-		*parent_status = OVER_LIMIT;
+	page_counter_charge(&prot->memory_allocated, amt);
+	*parent_status = OVER_LIMIT;
 }
 
 static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 03/14] net: tcp_memcontrol: remove bogus hierarchy pressure propagation
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

When a cgroup currently breaches its socket memory limit, it enters
memory pressure mode for itself and its *ancestors*. This throttles
transmission in unrelated sibling and cousin subtrees that have
nothing to do with the breached limit.

On the contrary, breaching a limit should make that group and its
*children* enter memory pressure mode. But this happens already,
albeit lazily: if an ancestor limit is breached, siblings will enter
memory pressure on their own once the next packet arrives for them.

So no additional hierarchy code is needed. Remove the bogus stuff.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/net/sock.h | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 8133c71..e27a8bb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1152,14 +1152,8 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 	if (*memory_pressure)
 		*memory_pressure = 0;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-		struct proto *prot = sk->sk_prot;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			cg_proto->memory_pressure = 0;
-	}
-
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sk->sk_cgrp->memory_pressure = 0;
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
@@ -1167,13 +1161,8 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 	if (!sk->sk_prot->enter_memory_pressure)
 		return;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-		struct proto *prot = sk->sk_prot;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			cg_proto->memory_pressure = 1;
-	}
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sk->sk_cgrp->memory_pressure = 1;
 
 	sk->sk_prot->enter_memory_pressure(sk);
 }
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 03/14] net: tcp_memcontrol: remove bogus hierarchy pressure propagation
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

When a cgroup currently breaches its socket memory limit, it enters
memory pressure mode for itself and its *ancestors*. This throttles
transmission in unrelated sibling and cousin subtrees that have
nothing to do with the breached limit.

On the contrary, breaching a limit should make that group and its
*children* enter memory pressure mode. But this happens already,
albeit lazily: if an ancestor limit is breached, siblings will enter
memory pressure on their own once the next packet arrives for them.

So no additional hierarchy code is needed. Remove the bogus stuff.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/net/sock.h | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 8133c71..e27a8bb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1152,14 +1152,8 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 	if (*memory_pressure)
 		*memory_pressure = 0;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-		struct proto *prot = sk->sk_prot;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			cg_proto->memory_pressure = 0;
-	}
-
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sk->sk_cgrp->memory_pressure = 0;
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
@@ -1167,13 +1161,8 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 	if (!sk->sk_prot->enter_memory_pressure)
 		return;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-		struct proto *prot = sk->sk_prot;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			cg_proto->memory_pressure = 1;
-	}
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sk->sk_cgrp->memory_pressure = 1;
 
 	sk->sk_prot->enter_memory_pressure(sk);
 }
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 04/14] net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Move the jump-label from sock_update_memcg() and sock_release_memcg()
to the callsite, and so eliminate those function calls when socket
accounting is not enabled.

This also eliminates the need for dummy functions because the calls
will be optimized away if the Kconfig options are not enabled.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h |  9 --------
 mm/memcontrol.c            | 56 +++++++++++++++++++++-------------------------
 net/core/sock.c            |  9 ++------
 net/ipv4/tcp.c             |  3 ++-
 net/ipv4/tcp_ipv4.c        |  4 +++-
 5 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 320b690..aed64b6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -698,17 +698,8 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
-#else
-static inline void sock_update_memcg(struct sock *sk)
-{
-}
-static inline void sock_release_memcg(struct sock *sk)
-{
-}
-#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6ea649..0b78f82 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -293,46 +293,40 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 
 void sock_update_memcg(struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled) {
-		struct mem_cgroup *memcg;
-		struct cg_proto *cg_proto;
+	struct mem_cgroup *memcg;
+	struct cg_proto *cg_proto;
 
-		BUG_ON(!sk->sk_prot->proto_cgroup);
+	BUG_ON(!sk->sk_prot->proto_cgroup);
 
-		/* Socket cloning can throw us here with sk_cgrp already
-		 * filled. It won't however, necessarily happen from
-		 * process context. So the test for root memcg given
-		 * the current task's memcg won't help us in this case.
-		 *
-		 * Respecting the original socket's memcg is a better
-		 * decision in this case.
-		 */
-		if (sk->sk_cgrp) {
-			BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
-			css_get(&sk->sk_cgrp->memcg->css);
-			return;
-		}
+	/* Socket cloning can throw us here with sk_cgrp already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_cgrp) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
+		css_get(&sk->sk_cgrp->memcg->css);
+		return;
+	}
 
-		rcu_read_lock();
-		memcg = mem_cgroup_from_task(current);
-		cg_proto = sk->sk_prot->proto_cgroup(memcg);
-		if (cg_proto && cg_proto->active &&
-		    css_tryget_online(&memcg->css)) {
-			sk->sk_cgrp = cg_proto;
-		}
-		rcu_read_unlock();
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(current);
+	cg_proto = sk->sk_prot->proto_cgroup(memcg);
+	if (cg_proto && cg_proto->active &&
+	    css_tryget_online(&memcg->css)) {
+		sk->sk_cgrp = cg_proto;
 	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
 void sock_release_memcg(struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct mem_cgroup *memcg;
-		WARN_ON(!sk->sk_cgrp->memcg);
-		memcg = sk->sk_cgrp->memcg;
-		css_put(&sk->sk_cgrp->memcg->css);
-	}
+	WARN_ON(!sk->sk_cgrp->memcg);
+	css_put(&sk->sk_cgrp->memcg->css);
 }
 
 struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54..04e54bc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1488,12 +1488,6 @@ void sk_free(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_free);
 
-static void sk_update_clone(const struct sock *sk, struct sock *newsk)
-{
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sock_update_memcg(newsk);
-}
-
 /**
  *	sk_clone_lock - clone a socket, and lock its clone
  *	@sk: the socket to clone
@@ -1589,7 +1583,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		sk_set_socket(newsk, NULL);
 		newsk->sk_wq = NULL;
 
-		sk_update_clone(sk, newsk);
+		if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+			sock_update_memcg(newsk);
 
 		if (newsk->sk_prot->sockets_allocated)
 			sk_sockets_allocated_inc(newsk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c172877..f166c28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -422,7 +422,8 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
-	sock_update_memcg(sk);
+	if (mem_cgroup_sockets_enabled)
+		sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db00343..4027e02 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1813,7 +1813,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
-	sock_release_memcg(sk);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 04/14] net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Move the jump-label from sock_update_memcg() and sock_release_memcg()
to the callsite, and so eliminate those function calls when socket
accounting is not enabled.

This also eliminates the need for dummy functions because the calls
will be optimized away if the Kconfig options are not enabled.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h |  9 --------
 mm/memcontrol.c            | 56 +++++++++++++++++++++-------------------------
 net/core/sock.c            |  9 ++------
 net/ipv4/tcp.c             |  3 ++-
 net/ipv4/tcp_ipv4.c        |  4 +++-
 5 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 320b690..aed64b6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -698,17 +698,8 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
-#else
-static inline void sock_update_memcg(struct sock *sk)
-{
-}
-static inline void sock_release_memcg(struct sock *sk)
-{
-}
-#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6ea649..0b78f82 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -293,46 +293,40 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 
 void sock_update_memcg(struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled) {
-		struct mem_cgroup *memcg;
-		struct cg_proto *cg_proto;
+	struct mem_cgroup *memcg;
+	struct cg_proto *cg_proto;
 
-		BUG_ON(!sk->sk_prot->proto_cgroup);
+	BUG_ON(!sk->sk_prot->proto_cgroup);
 
-		/* Socket cloning can throw us here with sk_cgrp already
-		 * filled. It won't however, necessarily happen from
-		 * process context. So the test for root memcg given
-		 * the current task's memcg won't help us in this case.
-		 *
-		 * Respecting the original socket's memcg is a better
-		 * decision in this case.
-		 */
-		if (sk->sk_cgrp) {
-			BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
-			css_get(&sk->sk_cgrp->memcg->css);
-			return;
-		}
+	/* Socket cloning can throw us here with sk_cgrp already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_cgrp) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
+		css_get(&sk->sk_cgrp->memcg->css);
+		return;
+	}
 
-		rcu_read_lock();
-		memcg = mem_cgroup_from_task(current);
-		cg_proto = sk->sk_prot->proto_cgroup(memcg);
-		if (cg_proto && cg_proto->active &&
-		    css_tryget_online(&memcg->css)) {
-			sk->sk_cgrp = cg_proto;
-		}
-		rcu_read_unlock();
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(current);
+	cg_proto = sk->sk_prot->proto_cgroup(memcg);
+	if (cg_proto && cg_proto->active &&
+	    css_tryget_online(&memcg->css)) {
+		sk->sk_cgrp = cg_proto;
 	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
 void sock_release_memcg(struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct mem_cgroup *memcg;
-		WARN_ON(!sk->sk_cgrp->memcg);
-		memcg = sk->sk_cgrp->memcg;
-		css_put(&sk->sk_cgrp->memcg->css);
-	}
+	WARN_ON(!sk->sk_cgrp->memcg);
+	css_put(&sk->sk_cgrp->memcg->css);
 }
 
 struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54..04e54bc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1488,12 +1488,6 @@ void sk_free(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_free);
 
-static void sk_update_clone(const struct sock *sk, struct sock *newsk)
-{
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sock_update_memcg(newsk);
-}
-
 /**
  *	sk_clone_lock - clone a socket, and lock its clone
  *	@sk: the socket to clone
@@ -1589,7 +1583,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		sk_set_socket(newsk, NULL);
 		newsk->sk_wq = NULL;
 
-		sk_update_clone(sk, newsk);
+		if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+			sock_update_memcg(newsk);
 
 		if (newsk->sk_prot->sockets_allocated)
 			sk_sockets_allocated_inc(newsk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c172877..f166c28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -422,7 +422,8 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
-	sock_update_memcg(sk);
+	if (mem_cgroup_sockets_enabled)
+		sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db00343..4027e02 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1813,7 +1813,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
-	sock_release_memcg(sk);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 05/14] net: tcp_memcontrol: remove dead per-memcg count of allocated sockets
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The number of allocated sockets is used for calculations in the soft
limit phase, where packets are accepted but the socket is under memory
pressure. Since there is no soft limit phase in tcp_memcontrol, and
memory pressure is only entered when packets are already dropped, this
is actually dead code. Remove it.

As this is the last user of parent_cg_proto(), remove that too.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h |  1 -
 include/net/sock.h         | 39 +++------------------------------------
 net/ipv4/tcp_memcontrol.c  |  3 ---
 3 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aed64b6..1df8e89 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -87,7 +87,6 @@ enum mem_cgroup_events_target {
 
 struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
-	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
 	int			memory_pressure;
 	bool			active;
 	long			sysctl_mem[3];
diff --git a/include/net/sock.h b/include/net/sock.h
index e27a8bb..7afbdab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1095,19 +1095,9 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
 extern struct static_key memcg_socket_limit_enabled;
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
-					       struct cg_proto *cg_proto)
-{
-	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
-}
 #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
 #else
 #define mem_cgroup_sockets_enabled 0
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
-					       struct cg_proto *cg_proto)
-{
-	return NULL;
-}
 #endif
 
 static inline bool sk_stream_memory_free(const struct sock *sk)
@@ -1233,41 +1223,18 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			percpu_counter_dec(&cg_proto->sockets_allocated);
-	}
-
-	percpu_counter_dec(prot->sockets_allocated);
+	percpu_counter_dec(sk->sk_prot->sockets_allocated);
 }
 
 static inline void sk_sockets_allocated_inc(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			percpu_counter_inc(&cg_proto->sockets_allocated);
-	}
-
-	percpu_counter_inc(prot->sockets_allocated);
+	percpu_counter_inc(sk->sk_prot->sockets_allocated);
 }
 
 static inline int
 sk_sockets_allocated_read_positive(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated);
-
-	return percpu_counter_read_positive(prot->sockets_allocated);
+	return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
 }
 
 static inline int
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d07579a..6759e0d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 		counter_parent = &parent_cg->memory_allocated;
 
 	page_counter_init(&cg_proto->memory_allocated, counter_parent);
-	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
 }
@@ -46,8 +45,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 	if (!cg_proto)
 		return;
 
-	percpu_counter_destroy(&cg_proto->sockets_allocated);
-
 	if (cg_proto->active)
 		static_key_slow_dec(&memcg_socket_limit_enabled);
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 05/14] net: tcp_memcontrol: remove dead per-memcg count of allocated sockets
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The number of allocated sockets is used for calculations in the soft
limit phase, where packets are accepted but the socket is under memory
pressure. Since there is no soft limit phase in tcp_memcontrol, and
memory pressure is only entered when packets are already dropped, this
is actually dead code. Remove it.

As this is the last user of parent_cg_proto(), remove that too.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h |  1 -
 include/net/sock.h         | 39 +++------------------------------------
 net/ipv4/tcp_memcontrol.c  |  3 ---
 3 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aed64b6..1df8e89 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -87,7 +87,6 @@ enum mem_cgroup_events_target {
 
 struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
-	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
 	int			memory_pressure;
 	bool			active;
 	long			sysctl_mem[3];
diff --git a/include/net/sock.h b/include/net/sock.h
index e27a8bb..7afbdab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1095,19 +1095,9 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
 extern struct static_key memcg_socket_limit_enabled;
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
-					       struct cg_proto *cg_proto)
-{
-	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
-}
 #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
 #else
 #define mem_cgroup_sockets_enabled 0
-static inline struct cg_proto *parent_cg_proto(struct proto *proto,
-					       struct cg_proto *cg_proto)
-{
-	return NULL;
-}
 #endif
 
 static inline bool sk_stream_memory_free(const struct sock *sk)
@@ -1233,41 +1223,18 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			percpu_counter_dec(&cg_proto->sockets_allocated);
-	}
-
-	percpu_counter_dec(prot->sockets_allocated);
+	percpu_counter_dec(sk->sk_prot->sockets_allocated);
 }
 
 static inline void sk_sockets_allocated_inc(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		struct cg_proto *cg_proto = sk->sk_cgrp;
-
-		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
-			percpu_counter_inc(&cg_proto->sockets_allocated);
-	}
-
-	percpu_counter_inc(prot->sockets_allocated);
+	percpu_counter_inc(sk->sk_prot->sockets_allocated);
 }
 
 static inline int
 sk_sockets_allocated_read_positive(struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated);
-
-	return percpu_counter_read_positive(prot->sockets_allocated);
+	return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
 }
 
 static inline int
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d07579a..6759e0d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 		counter_parent = &parent_cg->memory_allocated;
 
 	page_counter_init(&cg_proto->memory_allocated, counter_parent);
-	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
 }
@@ -46,8 +45,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 	if (!cg_proto)
 		return;
 
-	percpu_counter_destroy(&cg_proto->sockets_allocated);
-
 	if (cg_proto->active)
 		static_key_slow_dec(&memcg_socket_limit_enabled);
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 06/14] net: tcp_memcontrol: simplify the per-memcg limit access
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

tcp_memcontrol replicates the global sysctl_mem limit array per
cgroup, but it only ever sets these entries to the value of the
memory_allocated page_counter limit. Use the latter directly.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 1 -
 include/net/sock.h         | 8 +++++---
 net/ipv4/tcp_memcontrol.c  | 8 --------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1df8e89..be72aea 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -89,7 +89,6 @@ struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	int			memory_pressure;
 	bool			active;
-	long			sysctl_mem[3];
 	/*
 	 * memcg field is used to find which memcg we belong directly
 	 * Each memcg struct can hold more than one cg_proto, so container_of
diff --git a/include/net/sock.h b/include/net/sock.h
index 7afbdab..0b333c2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1159,10 +1159,12 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
-	long *prot = sk->sk_prot->sysctl_mem;
+	long limit = sk->sk_prot->sysctl_mem[index];
+
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		prot = sk->sk_cgrp->sysctl_mem;
-	return prot[index];
+		limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit);
+
+	return limit;
 }
 
 static inline void memcg_memory_allocated_add(struct cg_proto *prot,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 6759e0d..ef4268d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -21,9 +21,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 	if (!cg_proto)
 		return 0;
 
-	cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
-	cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
-	cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
 	cg_proto->memory_pressure = 0;
 	cg_proto->memcg = memcg;
 
@@ -54,7 +51,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup);
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
 	struct cg_proto *cg_proto;
-	int i;
 	int ret;
 
 	cg_proto = tcp_prot.proto_cgroup(memcg);
@@ -65,10 +61,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 	if (ret)
 		return ret;
 
-	for (i = 0; i < 3; i++)
-		cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
-						sysctl_tcp_mem[i]);
-
 	if (!cg_proto->active) {
 		/*
 		 * The active flag needs to be written after the static_key
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 06/14] net: tcp_memcontrol: simplify the per-memcg limit access
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

tcp_memcontrol replicates the global sysctl_mem limit array per
cgroup, but it only ever sets these entries to the value of the
memory_allocated page_counter limit. Use the latter directly.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 1 -
 include/net/sock.h         | 8 +++++---
 net/ipv4/tcp_memcontrol.c  | 8 --------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1df8e89..be72aea 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -89,7 +89,6 @@ struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	int			memory_pressure;
 	bool			active;
-	long			sysctl_mem[3];
 	/*
 	 * memcg field is used to find which memcg we belong directly
 	 * Each memcg struct can hold more than one cg_proto, so container_of
diff --git a/include/net/sock.h b/include/net/sock.h
index 7afbdab..0b333c2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1159,10 +1159,12 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
-	long *prot = sk->sk_prot->sysctl_mem;
+	long limit = sk->sk_prot->sysctl_mem[index];
+
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		prot = sk->sk_cgrp->sysctl_mem;
-	return prot[index];
+		limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit);
+
+	return limit;
 }
 
 static inline void memcg_memory_allocated_add(struct cg_proto *prot,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 6759e0d..ef4268d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -21,9 +21,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 	if (!cg_proto)
 		return 0;
 
-	cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
-	cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
-	cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
 	cg_proto->memory_pressure = 0;
 	cg_proto->memcg = memcg;
 
@@ -54,7 +51,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup);
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
 	struct cg_proto *cg_proto;
-	int i;
 	int ret;
 
 	cg_proto = tcp_prot.proto_cgroup(memcg);
@@ -65,10 +61,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 	if (ret)
 		return ret;
 
-	for (i = 0; i < 3; i++)
-		cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
-						sysctl_tcp_mem[i]);
-
 	if (!cg_proto->active) {
 		/*
 		 * The active flag needs to be written after the static_key
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 07/14] net: tcp_memcontrol: sanitize tcp memory accounting callbacks
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

There won't be a tcp control soft limit, so integrating the memcg code
into the global skmem limiting scheme complicates things
unnecessarily. Replace this with simple and clear charge and uncharge
calls--hidden behind a jump label--to account skb memory.

Note that this is not purely aesthetic: as a result of shoehorning the
per-memcg code into the same memory accounting functions that handle
the global level, the old code would compare the per-memcg consumption
against the smaller of the per-memcg limit and the global limit. This
allowed the total consumption of multiple sockets to exceed the global
limit, as long as the individual sockets stayed within bounds. After
this change, the code will always compare the per-memcg consumption to
the per-memcg limit, and the global consumption to the global limit,
and thus close this loophole.

Without a soft limit, the per-memcg memory pressure state in sockets
is generally questionable. However, we did it until now, so we
continue to enter it when the hard limit is hit, and packets are
dropped, to let other sockets in the cgroup know that they shouldn't
grow their transmit windows, either. However, keep it simple in the
new callback model and leave memory pressure lazily when the next
packet is accepted (as opposed to doing it synchroneously when packets
are processed). When packets are dropped, network performance will
already be in the toilet, so that should be a reasonable trade-off.

As described above, consumption is now checked on the per-memcg level
and the global level separately. Likewise, memory pressure states are
maintained on both the per-memcg level and the global level, and a
socket is considered under pressure when either level asserts as much.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 19 +++++++++-----
 include/net/sock.h         | 64 ++++++----------------------------------------
 include/net/tcp.h          |  5 ++--
 mm/memcontrol.c            | 32 +++++++++++++++++++++++
 net/core/sock.c            | 26 +++++++++++--------
 net/ipv4/tcp_output.c      |  7 +++--
 6 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index be72aea..ef3f584 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -664,12 +664,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 }
 #endif /* CONFIG_MEMCG */
 
-enum {
-	UNDER_LIMIT,
-	SOFT_LIMIT,
-	OVER_LIMIT,
-};
-
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
@@ -698,6 +692,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 struct sock;
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
+bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
+{
+	return proto->memory_pressure;
+}
+#else
+static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
+{
+	return false;
+}
+#endif
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
diff --git a/include/net/sock.h b/include/net/sock.h
index 0b333c2..888aa3f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1126,8 +1126,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (!sk->sk_prot->memory_pressure)
 		return false;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return !!sk->sk_cgrp->memory_pressure;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+		return true;
 
 	return !!*sk->sk_prot->memory_pressure;
 }
@@ -1141,9 +1142,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 
 	if (*memory_pressure)
 		*memory_pressure = 0;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sk->sk_cgrp->memory_pressure = 0;
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
@@ -1151,76 +1149,30 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 	if (!sk->sk_prot->enter_memory_pressure)
 		return;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sk->sk_cgrp->memory_pressure = 1;
-
 	sk->sk_prot->enter_memory_pressure(sk);
 }
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
-	long limit = sk->sk_prot->sysctl_mem[index];
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit);
-
-	return limit;
-}
-
-static inline void memcg_memory_allocated_add(struct cg_proto *prot,
-					      unsigned long amt,
-					      int *parent_status)
-{
-	struct page_counter *counter;
-
-	if (page_counter_try_charge(&prot->memory_allocated, amt, &counter))
-		return;
-
-	page_counter_charge(&prot->memory_allocated, amt);
-	*parent_status = OVER_LIMIT;
-}
-
-static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
-					      unsigned long amt)
-{
-	page_counter_uncharge(&prot->memory_allocated, amt);
+	return sk->sk_prot->sysctl_mem[index];
 }
 
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return page_counter_read(&sk->sk_cgrp->memory_allocated);
-
-	return atomic_long_read(prot->memory_allocated);
+	return atomic_long_read(sk->sk_prot->memory_allocated);
 }
 
 static inline long
-sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
+sk_memory_allocated_add(struct sock *sk, int amt)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
-		/* update the root cgroup regardless */
-		atomic_long_add_return(amt, prot->memory_allocated);
-		return page_counter_read(&sk->sk_cgrp->memory_allocated);
-	}
-
-	return atomic_long_add_return(amt, prot->memory_allocated);
+	return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
 }
 
 static inline void
 sk_memory_allocated_sub(struct sock *sk, int amt)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
-
-	atomic_long_sub(amt, prot->memory_allocated);
+	atomic_long_sub(amt, sk->sk_prot->memory_allocated);
 }
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..04517d6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,8 +292,9 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return !!sk->sk_cgrp->memory_pressure;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+		return true;
 
 	return tcp_memory_pressure;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0b78f82..4b586ea 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -338,6 +338,38 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(tcp_proto_cgroup);
 
+/**
+ * mem_cgroup_charge_skmem - charge socket memory
+ * @proto: proto to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Charges @nr_pages to @proto. Returns %true if the charge fit within
+ * @proto's configured limit, %false if the charge had to be forced.
+ */
+bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+{
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&proto->memory_allocated,
+				    nr_pages, &counter)) {
+		proto->memory_pressure = 0;
+		return true;
+	}
+	page_counter_charge(&proto->memory_allocated, nr_pages);
+	proto->memory_pressure = 1;
+	return false;
+}
+
+/**
+ * mem_cgroup_uncharge_skmem - uncharge socket memory
+ * @proto - proto to uncharge
+ * @nr_pages - number of pages to uncharge
+ */
+void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+{
+	page_counter_uncharge(&proto->memory_allocated, nr_pages);
+}
+
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/net/core/sock.c b/net/core/sock.c
index 04e54bc..5b1b96f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2066,27 +2066,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
 	long allocated;
-	int parent_status = UNDER_LIMIT;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 
-	allocated = sk_memory_allocated_add(sk, amt, &parent_status);
+	allocated = sk_memory_allocated_add(sk, amt);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
+		goto suppress_allocation;
 
 	/* Under limit. */
-	if (parent_status == UNDER_LIMIT &&
-			allocated <= sk_prot_mem_limits(sk, 0)) {
+	if (allocated <= sk_prot_mem_limits(sk, 0)) {
 		sk_leave_memory_pressure(sk);
 		return 1;
 	}
 
-	/* Under pressure. (we or our parents) */
-	if ((parent_status > SOFT_LIMIT) ||
-			allocated > sk_prot_mem_limits(sk, 1))
+	/* Under pressure. */
+	if (allocated > sk_prot_mem_limits(sk, 1))
 		sk_enter_memory_pressure(sk);
 
-	/* Over hard limit (we or our parents) */
-	if ((parent_status == OVER_LIMIT) ||
-			(allocated > sk_prot_mem_limits(sk, 2)))
+	/* Over hard limit. */
+	if (allocated > sk_prot_mem_limits(sk, 2))
 		goto suppress_allocation;
 
 	/* guarantee minimum buffer size under pressure */
@@ -2135,6 +2135,9 @@ suppress_allocation:
 
 	sk_memory_allocated_sub(sk, amt);
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
+
 	return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2150,6 +2153,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
 	sk_memory_allocated_sub(sk, amount);
 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
+
 	if (sk_under_memory_pressure(sk) &&
 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
 		sk_leave_memory_pressure(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cb7ca56..7aa168a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2813,13 +2813,16 @@ begin_fwd:
  */
 void sk_forced_mem_schedule(struct sock *sk, int size)
 {
-	int amt, status;
+	int amt;
 
 	if (size <= sk->sk_forward_alloc)
 		return;
 	amt = sk_mem_pages(size);
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-	sk_memory_allocated_add(sk, amt, &status);
+	sk_memory_allocated_add(sk, amt);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
 }
 
 /* Send a FIN. The caller locks the socket for us.
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 07/14] net: tcp_memcontrol: sanitize tcp memory accounting callbacks
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

There won't be a tcp control soft limit, so integrating the memcg code
into the global skmem limiting scheme complicates things
unnecessarily. Replace this with simple and clear charge and uncharge
calls--hidden behind a jump label--to account skb memory.

Note that this is not purely aesthetic: as a result of shoehorning the
per-memcg code into the same memory accounting functions that handle
the global level, the old code would compare the per-memcg consumption
against the smaller of the per-memcg limit and the global limit. This
allowed the total consumption of multiple sockets to exceed the global
limit, as long as the individual sockets stayed within bounds. After
this change, the code will always compare the per-memcg consumption to
the per-memcg limit, and the global consumption to the global limit,
and thus close this loophole.

Without a soft limit, the per-memcg memory pressure state in sockets
is generally questionable. However, we did it until now, so we
continue to enter it when the hard limit is hit, and packets are
dropped, to let other sockets in the cgroup know that they shouldn't
grow their transmit windows, either. However, keep it simple in the
new callback model and leave memory pressure lazily when the next
packet is accepted (as opposed to doing it synchroneously when packets
are processed). When packets are dropped, network performance will
already be in the toilet, so that should be a reasonable trade-off.

As described above, consumption is now checked on the per-memcg level
and the global level separately. Likewise, memory pressure states are
maintained on both the per-memcg level and the global level, and a
socket is considered under pressure when either level asserts as much.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 19 +++++++++-----
 include/net/sock.h         | 64 ++++++----------------------------------------
 include/net/tcp.h          |  5 ++--
 mm/memcontrol.c            | 32 +++++++++++++++++++++++
 net/core/sock.c            | 26 +++++++++++--------
 net/ipv4/tcp_output.c      |  7 +++--
 6 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index be72aea..ef3f584 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -664,12 +664,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 }
 #endif /* CONFIG_MEMCG */
 
-enum {
-	UNDER_LIMIT,
-	SOFT_LIMIT,
-	OVER_LIMIT,
-};
-
 #ifdef CONFIG_CGROUP_WRITEBACK
 
 struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
@@ -698,6 +692,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 struct sock;
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
+bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
+{
+	return proto->memory_pressure;
+}
+#else
+static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
+{
+	return false;
+}
+#endif
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
diff --git a/include/net/sock.h b/include/net/sock.h
index 0b333c2..888aa3f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1126,8 +1126,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (!sk->sk_prot->memory_pressure)
 		return false;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return !!sk->sk_cgrp->memory_pressure;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+		return true;
 
 	return !!*sk->sk_prot->memory_pressure;
 }
@@ -1141,9 +1142,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 
 	if (*memory_pressure)
 		*memory_pressure = 0;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sk->sk_cgrp->memory_pressure = 0;
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
@@ -1151,76 +1149,30 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
 	if (!sk->sk_prot->enter_memory_pressure)
 		return;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		sk->sk_cgrp->memory_pressure = 1;
-
 	sk->sk_prot->enter_memory_pressure(sk);
 }
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
-	long limit = sk->sk_prot->sysctl_mem[index];
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit);
-
-	return limit;
-}
-
-static inline void memcg_memory_allocated_add(struct cg_proto *prot,
-					      unsigned long amt,
-					      int *parent_status)
-{
-	struct page_counter *counter;
-
-	if (page_counter_try_charge(&prot->memory_allocated, amt, &counter))
-		return;
-
-	page_counter_charge(&prot->memory_allocated, amt);
-	*parent_status = OVER_LIMIT;
-}
-
-static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
-					      unsigned long amt)
-{
-	page_counter_uncharge(&prot->memory_allocated, amt);
+	return sk->sk_prot->sysctl_mem[index];
 }
 
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return page_counter_read(&sk->sk_cgrp->memory_allocated);
-
-	return atomic_long_read(prot->memory_allocated);
+	return atomic_long_read(sk->sk_prot->memory_allocated);
 }
 
 static inline long
-sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
+sk_memory_allocated_add(struct sock *sk, int amt)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
-		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
-		/* update the root cgroup regardless */
-		atomic_long_add_return(amt, prot->memory_allocated);
-		return page_counter_read(&sk->sk_cgrp->memory_allocated);
-	}
-
-	return atomic_long_add_return(amt, prot->memory_allocated);
+	return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
 }
 
 static inline void
 sk_memory_allocated_sub(struct sock *sk, int amt)
 {
-	struct proto *prot = sk->sk_prot;
-
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
-
-	atomic_long_sub(amt, prot->memory_allocated);
+	atomic_long_sub(amt, sk->sk_prot->memory_allocated);
 }
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..04517d6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,8 +292,9 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return !!sk->sk_cgrp->memory_pressure;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+		return true;
 
 	return tcp_memory_pressure;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0b78f82..4b586ea 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -338,6 +338,38 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(tcp_proto_cgroup);
 
+/**
+ * mem_cgroup_charge_skmem - charge socket memory
+ * @proto: proto to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Charges @nr_pages to @proto. Returns %true if the charge fit within
+ * @proto's configured limit, %false if the charge had to be forced.
+ */
+bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+{
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&proto->memory_allocated,
+				    nr_pages, &counter)) {
+		proto->memory_pressure = 0;
+		return true;
+	}
+	page_counter_charge(&proto->memory_allocated, nr_pages);
+	proto->memory_pressure = 1;
+	return false;
+}
+
+/**
+ * mem_cgroup_uncharge_skmem - uncharge socket memory
+ * @proto - proto to uncharge
+ * @nr_pages - number of pages to uncharge
+ */
+void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+{
+	page_counter_uncharge(&proto->memory_allocated, nr_pages);
+}
+
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/net/core/sock.c b/net/core/sock.c
index 04e54bc..5b1b96f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2066,27 +2066,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
 	long allocated;
-	int parent_status = UNDER_LIMIT;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 
-	allocated = sk_memory_allocated_add(sk, amt, &parent_status);
+	allocated = sk_memory_allocated_add(sk, amt);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
+		goto suppress_allocation;
 
 	/* Under limit. */
-	if (parent_status == UNDER_LIMIT &&
-			allocated <= sk_prot_mem_limits(sk, 0)) {
+	if (allocated <= sk_prot_mem_limits(sk, 0)) {
 		sk_leave_memory_pressure(sk);
 		return 1;
 	}
 
-	/* Under pressure. (we or our parents) */
-	if ((parent_status > SOFT_LIMIT) ||
-			allocated > sk_prot_mem_limits(sk, 1))
+	/* Under pressure. */
+	if (allocated > sk_prot_mem_limits(sk, 1))
 		sk_enter_memory_pressure(sk);
 
-	/* Over hard limit (we or our parents) */
-	if ((parent_status == OVER_LIMIT) ||
-			(allocated > sk_prot_mem_limits(sk, 2)))
+	/* Over hard limit. */
+	if (allocated > sk_prot_mem_limits(sk, 2))
 		goto suppress_allocation;
 
 	/* guarantee minimum buffer size under pressure */
@@ -2135,6 +2135,9 @@ suppress_allocation:
 
 	sk_memory_allocated_sub(sk, amt);
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
+
 	return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2150,6 +2153,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
 	sk_memory_allocated_sub(sk, amount);
 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
+
 	if (sk_under_memory_pressure(sk) &&
 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
 		sk_leave_memory_pressure(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cb7ca56..7aa168a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2813,13 +2813,16 @@ begin_fwd:
  */
 void sk_forced_mem_schedule(struct sock *sk, int size)
 {
-	int amt, status;
+	int amt;
 
 	if (size <= sk->sk_forward_alloc)
 		return;
 	amt = sk_mem_pages(size);
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-	sk_memory_allocated_add(sk, amt, &status);
+	sk_memory_allocated_add(sk, amt);
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
 }
 
 /* Send a FIN. The caller locks the socket for us.
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 08/14] net: tcp_memcontrol: simplify linkage between socket and page counter
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

There won't be any separate counters for socket memory consumed by
protocols other than TCP in the future. Remove the indirection and
link sockets directly to their owning memory cgroup.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h   | 20 ++++---------
 include/net/sock.h           | 36 +++---------------------
 include/net/tcp.h            |  4 +--
 include/net/tcp_memcontrol.h |  1 -
 mm/memcontrol.c              | 57 +++++++++++++++----------------------
 net/core/sock.c              | 52 +++++-----------------------------
 net/ipv4/tcp_ipv4.c          |  7 +----
 net/ipv4/tcp_memcontrol.c    | 67 +++++++++++++++++---------------------------
 net/ipv4/tcp_output.c        |  4 +--
 net/ipv6/tcp_ipv6.c          |  3 --
 10 files changed, 69 insertions(+), 182 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ef3f584..daf6dbe 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -89,16 +89,6 @@ struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	int			memory_pressure;
 	bool			active;
-	/*
-	 * memcg field is used to find which memcg we belong directly
-	 * Each memcg struct can hold more than one cg_proto, so container_of
-	 * won't really cut.
-	 *
-	 * The elegant solution would be having an inverse function to
-	 * proto_cgroup in struct proto, but that means polluting the structure
-	 * for everybody, instead of just for memcg users.
-	 */
-	struct mem_cgroup	*memcg;
 };
 
 #ifdef CONFIG_MEMCG
@@ -692,15 +682,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 struct sock;
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
-static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
-	return proto->memory_pressure;
+	return memcg->tcp_mem.memory_pressure;
 }
 #else
-static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index 888aa3f..1a94b85 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -69,22 +69,6 @@
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 
-struct cgroup;
-struct cgroup_subsys;
-#ifdef CONFIG_NET
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
-#else
-static inline
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-	return 0;
-}
-static inline
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-}
-#endif
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -310,7 +294,7 @@ struct cg_proto;
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
   *	@sk_classid: this socket's cgroup classid
-  *	@sk_cgrp: this socket's cgroup-specific proto data
+  *	@sk_memcg: this socket's memory cgroup association
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
@@ -447,7 +431,7 @@ struct sock {
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	u32			sk_classid;
 #endif
-	struct cg_proto		*sk_cgrp;
+	struct mem_cgroup	*sk_memcg;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk);
 	void			(*sk_write_space)(struct sock *sk);
@@ -1051,18 +1035,6 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	/*
-	 * cgroup specific init/deinit functions. Called once for all
-	 * protocols that implement it, from cgroups populate function.
-	 * This function has to setup any files the protocol want to
-	 * appear in the kmem cgroup filesystem.
-	 */
-	int			(*init_cgroup)(struct mem_cgroup *memcg,
-					       struct cgroup_subsys *ss);
-	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
-	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
-#endif
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
@@ -1126,8 +1098,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (!sk->sk_prot->memory_pressure)
 		return false;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
 		return true;
 
 	return !!*sk->sk_prot->memory_pressure;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 04517d6..c008535 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,8 +292,8 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
 		return true;
 
 	return tcp_memory_pressure;
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
index 05b94d9..3a17b16 100644
--- a/include/net/tcp_memcontrol.h
+++ b/include/net/tcp_memcontrol.h
@@ -1,7 +1,6 @@
 #ifndef _TCP_MEMCG_H
 #define _TCP_MEMCG_H
 
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
 void tcp_destroy_cgroup(struct mem_cgroup *memcg);
 #endif /* _TCP_MEMCG_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b586ea..68d67fc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -294,9 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 void sock_update_memcg(struct sock *sk)
 {
 	struct mem_cgroup *memcg;
-	struct cg_proto *cg_proto;
-
-	BUG_ON(!sk->sk_prot->proto_cgroup);
 
 	/* Socket cloning can throw us here with sk_cgrp already
 	 * filled. It won't however, necessarily happen from
@@ -306,68 +303,58 @@ void sock_update_memcg(struct sock *sk)
 	 * Respecting the original socket's memcg is a better
 	 * decision in this case.
 	 */
-	if (sk->sk_cgrp) {
-		BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
-		css_get(&sk->sk_cgrp->memcg->css);
+	if (sk->sk_memcg) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
+		css_get(&sk->sk_memcg->css);
 		return;
 	}
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
-	cg_proto = sk->sk_prot->proto_cgroup(memcg);
-	if (cg_proto && cg_proto->active &&
-	    css_tryget_online(&memcg->css)) {
-		sk->sk_cgrp = cg_proto;
-	}
+	if (memcg != root_mem_cgroup &&
+	    memcg->tcp_mem.active &&
+	    css_tryget_online(&memcg->css))
+		sk->sk_memcg = memcg;
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
 void sock_release_memcg(struct sock *sk)
 {
-	WARN_ON(!sk->sk_cgrp->memcg);
-	css_put(&sk->sk_cgrp->memcg->css);
-}
-
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
-{
-	if (!memcg || mem_cgroup_is_root(memcg))
-		return NULL;
-
-	return &memcg->tcp_mem;
+	WARN_ON(!sk->sk_memcg);
+	css_put(&sk->sk_memcg->css);
 }
-EXPORT_SYMBOL(tcp_proto_cgroup);
 
 /**
  * mem_cgroup_charge_skmem - charge socket memory
- * @proto: proto to charge
+ * @memcg: memcg to charge
  * @nr_pages: number of pages to charge
  *
- * Charges @nr_pages to @proto. Returns %true if the charge fit within
- * @proto's configured limit, %false if the charge had to be forced.
+ * Charges @nr_pages to @memcg. Returns %true if the charge fit within
+ * @memcg's configured limit, %false if the charge had to be forced.
  */
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct page_counter *counter;
 
-	if (page_counter_try_charge(&proto->memory_allocated,
+	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
 				    nr_pages, &counter)) {
-		proto->memory_pressure = 0;
+		memcg->tcp_mem.memory_pressure = 0;
 		return true;
 	}
-	page_counter_charge(&proto->memory_allocated, nr_pages);
-	proto->memory_pressure = 1;
+	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+	memcg->tcp_mem.memory_pressure = 1;
 	return false;
 }
 
 /**
  * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @proto - proto to uncharge
+ * @memcg - memcg to uncharge
  * @nr_pages - number of pages to uncharge
  */
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	page_counter_uncharge(&proto->memory_allocated, nr_pages);
+	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
 }
 
 #endif
@@ -3629,7 +3616,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 	if (ret)
 		return ret;
 
-	return mem_cgroup_sockets_init(memcg, ss);
+	return tcp_init_cgroup(memcg, ss);
 }
 
 static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
@@ -3685,7 +3672,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 		static_key_slow_dec(&memcg_kmem_enabled_key);
 		WARN_ON(page_counter_read(&memcg->kmem));
 	}
-	mem_cgroup_sockets_destroy(memcg);
+	tcp_destroy_cgroup(memcg);
 }
 #else
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b1b96f..6486b0d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -194,44 +194,6 @@ bool sk_net_capable(const struct sock *sk, int cap)
 }
 EXPORT_SYMBOL(sk_net_capable);
 
-
-#ifdef CONFIG_MEMCG_KMEM
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-	struct proto *proto;
-	int ret = 0;
-
-	mutex_lock(&proto_list_mutex);
-	list_for_each_entry(proto, &proto_list, node) {
-		if (proto->init_cgroup) {
-			ret = proto->init_cgroup(memcg, ss);
-			if (ret)
-				goto out;
-		}
-	}
-
-	mutex_unlock(&proto_list_mutex);
-	return ret;
-out:
-	list_for_each_entry_continue_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(memcg);
-	mutex_unlock(&proto_list_mutex);
-	return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-	struct proto *proto;
-
-	mutex_lock(&proto_list_mutex);
-	list_for_each_entry_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(memcg);
-	mutex_unlock(&proto_list_mutex);
-}
-#endif
-
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -1583,7 +1545,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		sk_set_socket(newsk, NULL);
 		newsk->sk_wq = NULL;
 
-		if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		if (mem_cgroup_sockets_enabled && sk->sk_memcg)
 			sock_update_memcg(newsk);
 
 		if (newsk->sk_prot->sockets_allocated)
@@ -2071,8 +2033,8 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 
 	allocated = sk_memory_allocated_add(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
 		goto suppress_allocation;
 
 	/* Under limit. */
@@ -2135,8 +2097,8 @@ suppress_allocation:
 
 	sk_memory_allocated_sub(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
 
 	return 0;
 }
@@ -2153,8 +2115,8 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
 	sk_memory_allocated_sub(sk, amount);
 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
 
 	if (sk_under_memory_pressure(sk) &&
 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4027e02..34c2678 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1814,7 +1814,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	sk_sockets_allocated_dec(sk);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
 		sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2339,11 +2339,6 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	.init_cgroup		= tcp_init_cgroup,
-	.destroy_cgroup		= tcp_destroy_cgroup,
-	.proto_cgroup		= tcp_proto_cgroup,
-#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index ef4268d..e507825 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -8,60 +8,47 @@
 
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	struct page_counter *counter_parent = NULL;
 	/*
 	 * The root cgroup does not use page_counters, but rather,
 	 * rely on the data already collected by the network
 	 * subsystem
 	 */
-	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-	struct page_counter *counter_parent = NULL;
-	struct cg_proto *cg_proto, *parent_cg;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return 0;
 
-	cg_proto->memory_pressure = 0;
-	cg_proto->memcg = memcg;
+	memcg->tcp_mem.memory_pressure = 0;
 
-	parent_cg = tcp_prot.proto_cgroup(parent);
-	if (parent_cg)
-		counter_parent = &parent_cg->memory_allocated;
+	if (parent)
+		counter_parent = &parent->tcp_mem.memory_allocated;
 
-	page_counter_init(&cg_proto->memory_allocated, counter_parent);
+	page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent);
 
 	return 0;
 }
-EXPORT_SYMBOL(tcp_init_cgroup);
 
 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 {
-	struct cg_proto *cg_proto;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return;
 
-	if (cg_proto->active)
+	if (memcg->tcp_mem.active)
 		static_key_slow_dec(&memcg_socket_limit_enabled);
-
 }
-EXPORT_SYMBOL(tcp_destroy_cgroup);
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
-	struct cg_proto *cg_proto;
 	int ret;
 
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return -EINVAL;
 
-	ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
+	ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages);
 	if (ret)
 		return ret;
 
-	if (!cg_proto->active) {
+	if (!memcg->tcp_mem.active) {
 		/*
 		 * The active flag needs to be written after the static_key
 		 * update. This is what guarantees that the socket activation
@@ -79,7 +66,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * patched in yet.
 		 */
 		static_key_slow_inc(&memcg_socket_limit_enabled);
-		cg_proto->active = true;
+		memcg->tcp_mem.active = true;
 	}
 
 	return 0;
@@ -123,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
 	u64 val;
 
 	switch (cft->private) {
 	case RES_LIMIT:
-		if (!cg_proto)
-			return PAGE_COUNTER_MAX;
-		val = cg_proto->memory_allocated.limit;
+		if (memcg == root_mem_cgroup)
+			val = PAGE_COUNTER_MAX;
+		else
+			val = memcg->tcp_mem.memory_allocated.limit;
 		val *= PAGE_SIZE;
 		break;
 	case RES_USAGE:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			val = atomic_long_read(&tcp_memory_allocated);
 		else
-			val = page_counter_read(&cg_proto->memory_allocated);
+			val = page_counter_read(&memcg->tcp_mem.memory_allocated);
 		val *= PAGE_SIZE;
 		break;
 	case RES_FAILCNT:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			return 0;
-		val = cg_proto->memory_allocated.failcnt;
+		val = memcg->tcp_mem.memory_allocated.failcnt;
 		break;
 	case RES_MAX_USAGE:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			return 0;
-		val = cg_proto->memory_allocated.watermark;
+		val = memcg->tcp_mem.memory_allocated.watermark;
 		val *= PAGE_SIZE;
 		break;
 	default:
@@ -161,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg;
-	struct cg_proto *cg_proto;
 
 	memcg = mem_cgroup_from_css(of_css(of));
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return nbytes;
 
 	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
-		page_counter_reset_watermark(&cg_proto->memory_allocated);
+		page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated);
 		break;
 	case RES_FAILCNT:
-		cg_proto->memory_allocated.failcnt = 0;
+		memcg->tcp_mem.memory_allocated.failcnt = 0;
 		break;
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7aa168a..7b83a65 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2821,8 +2821,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 	sk_memory_allocated_add(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_charge_skmem(sk->sk_memcg, amt);
 }
 
 /* Send a FIN. The caller locks the socket for us.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a6..1bfb682 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1880,9 +1880,6 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	.proto_cgroup		= tcp_proto_cgroup,
-#endif
 	.clear_sk		= tcp_v6_clear_sk,
 };
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 08/14] net: tcp_memcontrol: simplify linkage between socket and page counter
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

There won't be any separate counters for socket memory consumed by
protocols other than TCP in the future. Remove the indirection and
link sockets directly to their owning memory cgroup.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h   | 20 ++++---------
 include/net/sock.h           | 36 +++---------------------
 include/net/tcp.h            |  4 +--
 include/net/tcp_memcontrol.h |  1 -
 mm/memcontrol.c              | 57 +++++++++++++++----------------------
 net/core/sock.c              | 52 +++++-----------------------------
 net/ipv4/tcp_ipv4.c          |  7 +----
 net/ipv4/tcp_memcontrol.c    | 67 +++++++++++++++++---------------------------
 net/ipv4/tcp_output.c        |  4 +--
 net/ipv6/tcp_ipv6.c          |  3 --
 10 files changed, 69 insertions(+), 182 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ef3f584..daf6dbe 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -89,16 +89,6 @@ struct cg_proto {
 	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	int			memory_pressure;
 	bool			active;
-	/*
-	 * memcg field is used to find which memcg we belong directly
-	 * Each memcg struct can hold more than one cg_proto, so container_of
-	 * won't really cut.
-	 *
-	 * The elegant solution would be having an inverse function to
-	 * proto_cgroup in struct proto, but that means polluting the structure
-	 * for everybody, instead of just for memcg users.
-	 */
-	struct mem_cgroup	*memcg;
 };
 
 #ifdef CONFIG_MEMCG
@@ -692,15 +682,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 struct sock;
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
-static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
-	return proto->memory_pressure;
+	return memcg->tcp_mem.memory_pressure;
 }
 #else
-static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index 888aa3f..1a94b85 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -69,22 +69,6 @@
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 
-struct cgroup;
-struct cgroup_subsys;
-#ifdef CONFIG_NET
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
-#else
-static inline
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-	return 0;
-}
-static inline
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-}
-#endif
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -310,7 +294,7 @@ struct cg_proto;
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
   *	@sk_classid: this socket's cgroup classid
-  *	@sk_cgrp: this socket's cgroup-specific proto data
+  *	@sk_memcg: this socket's memory cgroup association
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
@@ -447,7 +431,7 @@ struct sock {
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	u32			sk_classid;
 #endif
-	struct cg_proto		*sk_cgrp;
+	struct mem_cgroup	*sk_memcg;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk);
 	void			(*sk_write_space)(struct sock *sk);
@@ -1051,18 +1035,6 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	/*
-	 * cgroup specific init/deinit functions. Called once for all
-	 * protocols that implement it, from cgroups populate function.
-	 * This function has to setup any files the protocol want to
-	 * appear in the kmem cgroup filesystem.
-	 */
-	int			(*init_cgroup)(struct mem_cgroup *memcg,
-					       struct cgroup_subsys *ss);
-	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
-	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
-#endif
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
@@ -1126,8 +1098,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	if (!sk->sk_prot->memory_pressure)
 		return false;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
 		return true;
 
 	return !!*sk->sk_prot->memory_pressure;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 04517d6..c008535 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,8 +292,8 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
 		return true;
 
 	return tcp_memory_pressure;
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
index 05b94d9..3a17b16 100644
--- a/include/net/tcp_memcontrol.h
+++ b/include/net/tcp_memcontrol.h
@@ -1,7 +1,6 @@
 #ifndef _TCP_MEMCG_H
 #define _TCP_MEMCG_H
 
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
 void tcp_destroy_cgroup(struct mem_cgroup *memcg);
 #endif /* _TCP_MEMCG_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b586ea..68d67fc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -294,9 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 void sock_update_memcg(struct sock *sk)
 {
 	struct mem_cgroup *memcg;
-	struct cg_proto *cg_proto;
-
-	BUG_ON(!sk->sk_prot->proto_cgroup);
 
 	/* Socket cloning can throw us here with sk_cgrp already
 	 * filled. It won't however, necessarily happen from
@@ -306,68 +303,58 @@ void sock_update_memcg(struct sock *sk)
 	 * Respecting the original socket's memcg is a better
 	 * decision in this case.
 	 */
-	if (sk->sk_cgrp) {
-		BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
-		css_get(&sk->sk_cgrp->memcg->css);
+	if (sk->sk_memcg) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
+		css_get(&sk->sk_memcg->css);
 		return;
 	}
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
-	cg_proto = sk->sk_prot->proto_cgroup(memcg);
-	if (cg_proto && cg_proto->active &&
-	    css_tryget_online(&memcg->css)) {
-		sk->sk_cgrp = cg_proto;
-	}
+	if (memcg != root_mem_cgroup &&
+	    memcg->tcp_mem.active &&
+	    css_tryget_online(&memcg->css))
+		sk->sk_memcg = memcg;
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
 void sock_release_memcg(struct sock *sk)
 {
-	WARN_ON(!sk->sk_cgrp->memcg);
-	css_put(&sk->sk_cgrp->memcg->css);
-}
-
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
-{
-	if (!memcg || mem_cgroup_is_root(memcg))
-		return NULL;
-
-	return &memcg->tcp_mem;
+	WARN_ON(!sk->sk_memcg);
+	css_put(&sk->sk_memcg->css);
 }
-EXPORT_SYMBOL(tcp_proto_cgroup);
 
 /**
  * mem_cgroup_charge_skmem - charge socket memory
- * @proto: proto to charge
+ * @memcg: memcg to charge
  * @nr_pages: number of pages to charge
  *
- * Charges @nr_pages to @proto. Returns %true if the charge fit within
- * @proto's configured limit, %false if the charge had to be forced.
+ * Charges @nr_pages to @memcg. Returns %true if the charge fit within
+ * @memcg's configured limit, %false if the charge had to be forced.
  */
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct page_counter *counter;
 
-	if (page_counter_try_charge(&proto->memory_allocated,
+	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
 				    nr_pages, &counter)) {
-		proto->memory_pressure = 0;
+		memcg->tcp_mem.memory_pressure = 0;
 		return true;
 	}
-	page_counter_charge(&proto->memory_allocated, nr_pages);
-	proto->memory_pressure = 1;
+	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+	memcg->tcp_mem.memory_pressure = 1;
 	return false;
 }
 
 /**
  * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @proto - proto to uncharge
+ * @memcg - memcg to uncharge
  * @nr_pages - number of pages to uncharge
  */
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	page_counter_uncharge(&proto->memory_allocated, nr_pages);
+	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
 }
 
 #endif
@@ -3629,7 +3616,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 	if (ret)
 		return ret;
 
-	return mem_cgroup_sockets_init(memcg, ss);
+	return tcp_init_cgroup(memcg, ss);
 }
 
 static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
@@ -3685,7 +3672,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 		static_key_slow_dec(&memcg_kmem_enabled_key);
 		WARN_ON(page_counter_read(&memcg->kmem));
 	}
-	mem_cgroup_sockets_destroy(memcg);
+	tcp_destroy_cgroup(memcg);
 }
 #else
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b1b96f..6486b0d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -194,44 +194,6 @@ bool sk_net_capable(const struct sock *sk, int cap)
 }
 EXPORT_SYMBOL(sk_net_capable);
 
-
-#ifdef CONFIG_MEMCG_KMEM
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-	struct proto *proto;
-	int ret = 0;
-
-	mutex_lock(&proto_list_mutex);
-	list_for_each_entry(proto, &proto_list, node) {
-		if (proto->init_cgroup) {
-			ret = proto->init_cgroup(memcg, ss);
-			if (ret)
-				goto out;
-		}
-	}
-
-	mutex_unlock(&proto_list_mutex);
-	return ret;
-out:
-	list_for_each_entry_continue_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(memcg);
-	mutex_unlock(&proto_list_mutex);
-	return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-	struct proto *proto;
-
-	mutex_lock(&proto_list_mutex);
-	list_for_each_entry_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(memcg);
-	mutex_unlock(&proto_list_mutex);
-}
-#endif
-
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -1583,7 +1545,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		sk_set_socket(newsk, NULL);
 		newsk->sk_wq = NULL;
 
-		if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		if (mem_cgroup_sockets_enabled && sk->sk_memcg)
 			sock_update_memcg(newsk);
 
 		if (newsk->sk_prot->sockets_allocated)
@@ -2071,8 +2033,8 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 
 	allocated = sk_memory_allocated_add(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
 		goto suppress_allocation;
 
 	/* Under limit. */
@@ -2135,8 +2097,8 @@ suppress_allocation:
 
 	sk_memory_allocated_sub(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
 
 	return 0;
 }
@@ -2153,8 +2115,8 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
 	sk_memory_allocated_sub(sk, amount);
 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
 
 	if (sk_under_memory_pressure(sk) &&
 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4027e02..34c2678 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1814,7 +1814,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	sk_sockets_allocated_dec(sk);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
 		sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2339,11 +2339,6 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	.init_cgroup		= tcp_init_cgroup,
-	.destroy_cgroup		= tcp_destroy_cgroup,
-	.proto_cgroup		= tcp_proto_cgroup,
-#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index ef4268d..e507825 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -8,60 +8,47 @@
 
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	struct page_counter *counter_parent = NULL;
 	/*
 	 * The root cgroup does not use page_counters, but rather,
 	 * rely on the data already collected by the network
 	 * subsystem
 	 */
-	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-	struct page_counter *counter_parent = NULL;
-	struct cg_proto *cg_proto, *parent_cg;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return 0;
 
-	cg_proto->memory_pressure = 0;
-	cg_proto->memcg = memcg;
+	memcg->tcp_mem.memory_pressure = 0;
 
-	parent_cg = tcp_prot.proto_cgroup(parent);
-	if (parent_cg)
-		counter_parent = &parent_cg->memory_allocated;
+	if (parent)
+		counter_parent = &parent->tcp_mem.memory_allocated;
 
-	page_counter_init(&cg_proto->memory_allocated, counter_parent);
+	page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent);
 
 	return 0;
 }
-EXPORT_SYMBOL(tcp_init_cgroup);
 
 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 {
-	struct cg_proto *cg_proto;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return;
 
-	if (cg_proto->active)
+	if (memcg->tcp_mem.active)
 		static_key_slow_dec(&memcg_socket_limit_enabled);
-
 }
-EXPORT_SYMBOL(tcp_destroy_cgroup);
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
-	struct cg_proto *cg_proto;
 	int ret;
 
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return -EINVAL;
 
-	ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
+	ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages);
 	if (ret)
 		return ret;
 
-	if (!cg_proto->active) {
+	if (!memcg->tcp_mem.active) {
 		/*
 		 * The active flag needs to be written after the static_key
 		 * update. This is what guarantees that the socket activation
@@ -79,7 +66,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * patched in yet.
 		 */
 		static_key_slow_inc(&memcg_socket_limit_enabled);
-		cg_proto->active = true;
+		memcg->tcp_mem.active = true;
 	}
 
 	return 0;
@@ -123,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
 	u64 val;
 
 	switch (cft->private) {
 	case RES_LIMIT:
-		if (!cg_proto)
-			return PAGE_COUNTER_MAX;
-		val = cg_proto->memory_allocated.limit;
+		if (memcg == root_mem_cgroup)
+			val = PAGE_COUNTER_MAX;
+		else
+			val = memcg->tcp_mem.memory_allocated.limit;
 		val *= PAGE_SIZE;
 		break;
 	case RES_USAGE:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			val = atomic_long_read(&tcp_memory_allocated);
 		else
-			val = page_counter_read(&cg_proto->memory_allocated);
+			val = page_counter_read(&memcg->tcp_mem.memory_allocated);
 		val *= PAGE_SIZE;
 		break;
 	case RES_FAILCNT:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			return 0;
-		val = cg_proto->memory_allocated.failcnt;
+		val = memcg->tcp_mem.memory_allocated.failcnt;
 		break;
 	case RES_MAX_USAGE:
-		if (!cg_proto)
+		if (memcg == root_mem_cgroup)
 			return 0;
-		val = cg_proto->memory_allocated.watermark;
+		val = memcg->tcp_mem.memory_allocated.watermark;
 		val *= PAGE_SIZE;
 		break;
 	default:
@@ -161,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg;
-	struct cg_proto *cg_proto;
 
 	memcg = mem_cgroup_from_css(of_css(of));
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
+	if (memcg == root_mem_cgroup)
 		return nbytes;
 
 	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
-		page_counter_reset_watermark(&cg_proto->memory_allocated);
+		page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated);
 		break;
 	case RES_FAILCNT:
-		cg_proto->memory_allocated.failcnt = 0;
+		memcg->tcp_mem.memory_allocated.failcnt = 0;
 		break;
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7aa168a..7b83a65 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2821,8 +2821,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 	sk_memory_allocated_add(sk, amt);
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
+	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+		mem_cgroup_charge_skmem(sk->sk_memcg, amt);
 }
 
 /* Send a FIN. The caller locks the socket for us.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a6..1bfb682 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1880,9 +1880,6 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
-	.proto_cgroup		= tcp_proto_cgroup,
-#endif
 	.clear_sk		= tcp_v6_clear_sk,
 };
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 09/14] mm: memcontrol: generalize the socket accounting jump label
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller is going to use this jump
label as well to control the networking callbacks. Move it to the
memory controller code and give it a more generic name.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 3 +++
 include/net/sock.h         | 7 -------
 mm/memcontrol.c            | 3 +++
 net/core/sock.c            | 5 -----
 net/ipv4/tcp_memcontrol.c  | 4 ++--
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index daf6dbe..654c2fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -685,11 +685,14 @@ void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+extern struct static_key memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return memcg->tcp_mem.memory_pressure;
 }
 #else
+#define mem_cgroup_sockets_enabled 0
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
diff --git a/include/net/sock.h b/include/net/sock.h
index 1a94b85..fcc9442 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1065,13 +1065,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 #define sk_refcnt_debug_release(sk) do { } while (0)
 #endif /* SOCK_REFCNT_DEBUG */
 
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
-extern struct static_key memcg_socket_limit_enabled;
-#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
-#else
-#define mem_cgroup_sockets_enabled 0
-#endif
-
 static inline bool sk_stream_memory_free(const struct sock *sk)
 {
 	if (sk->sk_wmem_queued >= sk->sk_sndbuf)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 68d67fc..0602bee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -291,6 +291,9 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 /* Writing them here to avoid exposing memcg's inner layout */
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 
+struct static_key memcg_sockets_enabled_key;
+EXPORT_SYMBOL(memcg_sockets_enabled_key);
+
 void sock_update_memcg(struct sock *sk)
 {
 	struct mem_cgroup *memcg;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6486b0d..c5435b5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -201,11 +201,6 @@ EXPORT_SYMBOL(sk_net_capable);
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-#if defined(CONFIG_MEMCG_KMEM)
-struct static_key memcg_socket_limit_enabled;
-EXPORT_SYMBOL(memcg_socket_limit_enabled);
-#endif
-
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index e507825..9a22e2d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 		return;
 
 	if (memcg->tcp_mem.active)
-		static_key_slow_dec(&memcg_socket_limit_enabled);
+		static_key_slow_dec(&memcg_sockets_enabled_key);
 }
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
@@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * because when this value change, the code to process it is not
 		 * patched in yet.
 		 */
-		static_key_slow_inc(&memcg_socket_limit_enabled);
+		static_key_slow_inc(&memcg_sockets_enabled_key);
 		memcg->tcp_mem.active = true;
 	}
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 09/14] mm: memcontrol: generalize the socket accounting jump label
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller is going to use this jump
label as well to control the networking callbacks. Move it to the
memory controller code and give it a more generic name.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 3 +++
 include/net/sock.h         | 7 -------
 mm/memcontrol.c            | 3 +++
 net/core/sock.c            | 5 -----
 net/ipv4/tcp_memcontrol.c  | 4 ++--
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index daf6dbe..654c2fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -685,11 +685,14 @@ void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+extern struct static_key memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return memcg->tcp_mem.memory_pressure;
 }
 #else
+#define mem_cgroup_sockets_enabled 0
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
diff --git a/include/net/sock.h b/include/net/sock.h
index 1a94b85..fcc9442 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1065,13 +1065,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 #define sk_refcnt_debug_release(sk) do { } while (0)
 #endif /* SOCK_REFCNT_DEBUG */
 
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)
-extern struct static_key memcg_socket_limit_enabled;
-#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
-#else
-#define mem_cgroup_sockets_enabled 0
-#endif
-
 static inline bool sk_stream_memory_free(const struct sock *sk)
 {
 	if (sk->sk_wmem_queued >= sk->sk_sndbuf)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 68d67fc..0602bee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -291,6 +291,9 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 /* Writing them here to avoid exposing memcg's inner layout */
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 
+struct static_key memcg_sockets_enabled_key;
+EXPORT_SYMBOL(memcg_sockets_enabled_key);
+
 void sock_update_memcg(struct sock *sk)
 {
 	struct mem_cgroup *memcg;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6486b0d..c5435b5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -201,11 +201,6 @@ EXPORT_SYMBOL(sk_net_capable);
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-#if defined(CONFIG_MEMCG_KMEM)
-struct static_key memcg_socket_limit_enabled;
-EXPORT_SYMBOL(memcg_socket_limit_enabled);
-#endif
-
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index e507825..9a22e2d 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 		return;
 
 	if (memcg->tcp_mem.active)
-		static_key_slow_dec(&memcg_socket_limit_enabled);
+		static_key_slow_dec(&memcg_sockets_enabled_key);
 }
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
@@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * because when this value change, the code to process it is not
 		 * patched in yet.
 		 */
-		static_key_slow_inc(&memcg_socket_limit_enabled);
+		static_key_slow_inc(&memcg_sockets_enabled_key);
 		memcg->tcp_mem.active = true;
 	}
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 10/14] mm: memcontrol: do not account memory+swap on unified hierarchy
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller doesn't expose the memory+swap
counter to userspace, but its accounting is hardcoded in all charge
paths right now, including the per-cpu charge cache ("the stock").

To avoid adding yet more pointless memory+swap accounting with the
socket memory support in unified hierarchy, disable the counter
altogether when in unified hierarchy mode.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 mm/memcontrol.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0602bee..6b8c0f7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -87,6 +87,12 @@ int do_swap_account __read_mostly;
 #define do_swap_account		0
 #endif
 
+/* Whether legacy memory+swap accounting is active */
+static bool do_memsw_account(void)
+{
+	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account;
+}
+
 static const char * const mem_cgroup_stat_names[] = {
 	"cache",
 	"rss",
@@ -1177,7 +1183,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 	if (count < limit)
 		margin = limit - count;
 
-	if (do_swap_account) {
+	if (do_memsw_account()) {
 		count = page_counter_read(&memcg->memsw);
 		limit = READ_ONCE(memcg->memsw.limit);
 		if (count <= limit)
@@ -1280,7 +1286,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 		pr_cont(":");
 
 		for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-			if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+			if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 				continue;
 			pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
 				K(mem_cgroup_read_stat(iter, i)));
@@ -1903,7 +1909,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
 
 	if (stock->nr_pages) {
 		page_counter_uncharge(&old->memory, stock->nr_pages);
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&old->memsw, stock->nr_pages);
 		css_put_many(&old->css, stock->nr_pages);
 		stock->nr_pages = 0;
@@ -2033,11 +2039,11 @@ retry:
 	if (consume_stock(memcg, nr_pages))
 		return 0;
 
-	if (!do_swap_account ||
+	if (!do_memsw_account() ||
 	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
 		if (page_counter_try_charge(&memcg->memory, batch, &counter))
 			goto done_restock;
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&memcg->memsw, batch);
 		mem_over_limit = mem_cgroup_from_counter(counter, memory);
 	} else {
@@ -2124,7 +2130,7 @@ force:
 	 * temporarily by force charging it.
 	 */
 	page_counter_charge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_charge(&memcg->memsw, nr_pages);
 	css_get_many(&memcg->css, nr_pages);
 
@@ -2161,7 +2167,7 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 		return;
 
 	page_counter_uncharge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	css_put_many(&memcg->css, nr_pages);
@@ -2447,7 +2453,7 @@ void __memcg_kmem_uncharge(struct page *page, int order)
 
 	page_counter_uncharge(&memcg->kmem, nr_pages);
 	page_counter_uncharge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	page->mem_cgroup = NULL;
@@ -3160,7 +3166,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 
 	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+		if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 			continue;
 		seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
 			   mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
@@ -3182,14 +3188,14 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	}
 	seq_printf(m, "hierarchical_memory_limit %llu\n",
 		   (u64)memory * PAGE_SIZE);
-	if (do_swap_account)
+	if (do_memsw_account())
 		seq_printf(m, "hierarchical_memsw_limit %llu\n",
 			   (u64)memsw * PAGE_SIZE);
 
 	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
 		unsigned long long val = 0;
 
-		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+		if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 			continue;
 		for_each_mem_cgroup_tree(mi, memcg)
 			val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
@@ -3320,7 +3326,7 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
 {
 	while (memcg) {
 		__mem_cgroup_threshold(memcg, false);
-		if (do_swap_account)
+		if (do_memsw_account())
 			__mem_cgroup_threshold(memcg, true);
 
 		memcg = parent_mem_cgroup(memcg);
@@ -4466,7 +4472,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 	 * we call find_get_page() with swapper_space directly.
 	 */
 	page = find_get_page(swap_address_space(ent), ent.val);
-	if (do_swap_account)
+	if (do_memsw_account())
 		entry->val = ent.val;
 
 	return page;
@@ -4501,7 +4507,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 		page = find_get_entry(mapping, pgoff);
 		if (radix_tree_exceptional_entry(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
-			if (do_swap_account)
+			if (do_memsw_account())
 				*entry = swp;
 			page = find_get_page(swap_address_space(swp), swp.val);
 		}
@@ -5276,7 +5282,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 		if (page->mem_cgroup)
 			goto out;
 
-		if (do_swap_account) {
+		if (do_memsw_account()) {
 			swp_entry_t ent = { .val = page_private(page), };
 			unsigned short id = lookup_swap_cgroup_id(ent);
 
@@ -5340,7 +5346,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 	local_irq_enable();
 
-	if (do_swap_account && PageSwapCache(page)) {
+	if (do_memsw_account() && PageSwapCache(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
 		/*
 		 * The swap entry might not get freed for a long time,
@@ -5385,7 +5391,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 
 	if (!mem_cgroup_is_root(memcg)) {
 		page_counter_uncharge(&memcg->memory, nr_pages);
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&memcg->memsw, nr_pages);
 		memcg_oom_recover(memcg);
 	}
@@ -5593,7 +5599,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 	VM_BUG_ON_PAGE(page_count(page), page);
 
-	if (!do_swap_account)
+	if (!do_memsw_account())
 		return;
 
 	memcg = page->mem_cgroup;
@@ -5633,7 +5639,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
 	struct mem_cgroup *memcg;
 	unsigned short id;
 
-	if (!do_swap_account)
+	if (!do_memsw_account())
 		return;
 
 	id = swap_cgroup_record(entry, 0);
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 10/14] mm: memcontrol: do not account memory+swap on unified hierarchy
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller doesn't expose the memory+swap
counter to userspace, but its accounting is hardcoded in all charge
paths right now, including the per-cpu charge cache ("the stock").

To avoid adding yet more pointless memory+swap accounting with the
socket memory support in unified hierarchy, disable the counter
altogether when in unified hierarchy mode.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 mm/memcontrol.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0602bee..6b8c0f7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -87,6 +87,12 @@ int do_swap_account __read_mostly;
 #define do_swap_account		0
 #endif
 
+/* Whether legacy memory+swap accounting is active */
+static bool do_memsw_account(void)
+{
+	return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account;
+}
+
 static const char * const mem_cgroup_stat_names[] = {
 	"cache",
 	"rss",
@@ -1177,7 +1183,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 	if (count < limit)
 		margin = limit - count;
 
-	if (do_swap_account) {
+	if (do_memsw_account()) {
 		count = page_counter_read(&memcg->memsw);
 		limit = READ_ONCE(memcg->memsw.limit);
 		if (count <= limit)
@@ -1280,7 +1286,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 		pr_cont(":");
 
 		for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-			if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+			if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 				continue;
 			pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
 				K(mem_cgroup_read_stat(iter, i)));
@@ -1903,7 +1909,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
 
 	if (stock->nr_pages) {
 		page_counter_uncharge(&old->memory, stock->nr_pages);
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&old->memsw, stock->nr_pages);
 		css_put_many(&old->css, stock->nr_pages);
 		stock->nr_pages = 0;
@@ -2033,11 +2039,11 @@ retry:
 	if (consume_stock(memcg, nr_pages))
 		return 0;
 
-	if (!do_swap_account ||
+	if (!do_memsw_account() ||
 	    page_counter_try_charge(&memcg->memsw, batch, &counter)) {
 		if (page_counter_try_charge(&memcg->memory, batch, &counter))
 			goto done_restock;
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&memcg->memsw, batch);
 		mem_over_limit = mem_cgroup_from_counter(counter, memory);
 	} else {
@@ -2124,7 +2130,7 @@ force:
 	 * temporarily by force charging it.
 	 */
 	page_counter_charge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_charge(&memcg->memsw, nr_pages);
 	css_get_many(&memcg->css, nr_pages);
 
@@ -2161,7 +2167,7 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 		return;
 
 	page_counter_uncharge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	css_put_many(&memcg->css, nr_pages);
@@ -2447,7 +2453,7 @@ void __memcg_kmem_uncharge(struct page *page, int order)
 
 	page_counter_uncharge(&memcg->kmem, nr_pages);
 	page_counter_uncharge(&memcg->memory, nr_pages);
-	if (do_swap_account)
+	if (do_memsw_account())
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	page->mem_cgroup = NULL;
@@ -3160,7 +3166,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 
 	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+		if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 			continue;
 		seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
 			   mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
@@ -3182,14 +3188,14 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	}
 	seq_printf(m, "hierarchical_memory_limit %llu\n",
 		   (u64)memory * PAGE_SIZE);
-	if (do_swap_account)
+	if (do_memsw_account())
 		seq_printf(m, "hierarchical_memsw_limit %llu\n",
 			   (u64)memsw * PAGE_SIZE);
 
 	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
 		unsigned long long val = 0;
 
-		if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
+		if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
 			continue;
 		for_each_mem_cgroup_tree(mi, memcg)
 			val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
@@ -3320,7 +3326,7 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
 {
 	while (memcg) {
 		__mem_cgroup_threshold(memcg, false);
-		if (do_swap_account)
+		if (do_memsw_account())
 			__mem_cgroup_threshold(memcg, true);
 
 		memcg = parent_mem_cgroup(memcg);
@@ -4466,7 +4472,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 	 * we call find_get_page() with swapper_space directly.
 	 */
 	page = find_get_page(swap_address_space(ent), ent.val);
-	if (do_swap_account)
+	if (do_memsw_account())
 		entry->val = ent.val;
 
 	return page;
@@ -4501,7 +4507,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 		page = find_get_entry(mapping, pgoff);
 		if (radix_tree_exceptional_entry(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
-			if (do_swap_account)
+			if (do_memsw_account())
 				*entry = swp;
 			page = find_get_page(swap_address_space(swp), swp.val);
 		}
@@ -5276,7 +5282,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 		if (page->mem_cgroup)
 			goto out;
 
-		if (do_swap_account) {
+		if (do_memsw_account()) {
 			swp_entry_t ent = { .val = page_private(page), };
 			unsigned short id = lookup_swap_cgroup_id(ent);
 
@@ -5340,7 +5346,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 	local_irq_enable();
 
-	if (do_swap_account && PageSwapCache(page)) {
+	if (do_memsw_account() && PageSwapCache(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
 		/*
 		 * The swap entry might not get freed for a long time,
@@ -5385,7 +5391,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 
 	if (!mem_cgroup_is_root(memcg)) {
 		page_counter_uncharge(&memcg->memory, nr_pages);
-		if (do_swap_account)
+		if (do_memsw_account())
 			page_counter_uncharge(&memcg->memsw, nr_pages);
 		memcg_oom_recover(memcg);
 	}
@@ -5593,7 +5599,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 	VM_BUG_ON_PAGE(page_count(page), page);
 
-	if (!do_swap_account)
+	if (!do_memsw_account())
 		return;
 
 	memcg = page->mem_cgroup;
@@ -5633,7 +5639,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
 	struct mem_cgroup *memcg;
 	unsigned short id;
 
-	if (!do_swap_account)
+	if (!do_memsw_account())
 		return;
 
 	id = swap_cgroup_record(entry, 0);
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 11/14] mm: memcontrol: move socket code for unified hierarchy accounting
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller will account socket
memory. Move the infrastructure functions accordingly.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 mm/memcontrol.c | 148 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 74 insertions(+), 74 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6b8c0f7..ed030b5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -294,80 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 	return mem_cgroup_from_css(css);
 }
 
-/* Writing them here to avoid exposing memcg's inner layout */
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
-
-struct static_key memcg_sockets_enabled_key;
-EXPORT_SYMBOL(memcg_sockets_enabled_key);
-
-void sock_update_memcg(struct sock *sk)
-{
-	struct mem_cgroup *memcg;
-
-	/* Socket cloning can throw us here with sk_cgrp already
-	 * filled. It won't however, necessarily happen from
-	 * process context. So the test for root memcg given
-	 * the current task's memcg won't help us in this case.
-	 *
-	 * Respecting the original socket's memcg is a better
-	 * decision in this case.
-	 */
-	if (sk->sk_memcg) {
-		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
-		css_get(&sk->sk_memcg->css);
-		return;
-	}
-
-	rcu_read_lock();
-	memcg = mem_cgroup_from_task(current);
-	if (memcg != root_mem_cgroup &&
-	    memcg->tcp_mem.active &&
-	    css_tryget_online(&memcg->css))
-		sk->sk_memcg = memcg;
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(sock_update_memcg);
-
-void sock_release_memcg(struct sock *sk)
-{
-	WARN_ON(!sk->sk_memcg);
-	css_put(&sk->sk_memcg->css);
-}
-
-/**
- * mem_cgroup_charge_skmem - charge socket memory
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- *
- * Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
- */
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
-{
-	struct page_counter *counter;
-
-	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
-				    nr_pages, &counter)) {
-		memcg->tcp_mem.memory_pressure = 0;
-		return true;
-	}
-	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
-	memcg->tcp_mem.memory_pressure = 1;
-	return false;
-}
-
-/**
- * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @memcg - memcg to uncharge
- * @nr_pages - number of pages to uncharge
- */
-void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
-{
-	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
-}
-
-#endif
-
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
@@ -5544,6 +5470,80 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 	commit_charge(newpage, memcg, true);
 }
 
+/* Writing them here to avoid exposing memcg's inner layout */
+#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+
+struct static_key memcg_sockets_enabled_key;
+EXPORT_SYMBOL(memcg_sockets_enabled_key);
+
+void sock_update_memcg(struct sock *sk)
+{
+	struct mem_cgroup *memcg;
+
+	/* Socket cloning can throw us here with sk_cgrp already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_memcg) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
+		css_get(&sk->sk_memcg->css);
+		return;
+	}
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(current);
+	if (memcg != root_mem_cgroup &&
+	    memcg->tcp_mem.active &&
+	    css_tryget_online(&memcg->css))
+		sk->sk_memcg = memcg;
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+	WARN_ON(!sk->sk_memcg);
+	css_put(&sk->sk_memcg->css);
+}
+
+/**
+ * mem_cgroup_charge_skmem - charge socket memory
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Charges @nr_pages to @memcg. Returns %true if the charge fit within
+ * @memcg's configured limit, %false if the charge had to be forced.
+ */
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
+				    nr_pages, &counter)) {
+		memcg->tcp_mem.memory_pressure = 0;
+		return true;
+	}
+	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+	memcg->tcp_mem.memory_pressure = 1;
+	return false;
+}
+
+/**
+ * mem_cgroup_uncharge_skmem - uncharge socket memory
+ * @memcg - memcg to uncharge
+ * @nr_pages - number of pages to uncharge
+ */
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
+}
+
+#endif
+
 /*
  * subsys_initcall() for memory controller.
  *
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 11/14] mm: memcontrol: move socket code for unified hierarchy accounting
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

The unified hierarchy memory controller will account socket
memory. Move the infrastructure functions accordingly.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 mm/memcontrol.c | 148 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 74 insertions(+), 74 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6b8c0f7..ed030b5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -294,80 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 	return mem_cgroup_from_css(css);
 }
 
-/* Writing them here to avoid exposing memcg's inner layout */
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
-
-struct static_key memcg_sockets_enabled_key;
-EXPORT_SYMBOL(memcg_sockets_enabled_key);
-
-void sock_update_memcg(struct sock *sk)
-{
-	struct mem_cgroup *memcg;
-
-	/* Socket cloning can throw us here with sk_cgrp already
-	 * filled. It won't however, necessarily happen from
-	 * process context. So the test for root memcg given
-	 * the current task's memcg won't help us in this case.
-	 *
-	 * Respecting the original socket's memcg is a better
-	 * decision in this case.
-	 */
-	if (sk->sk_memcg) {
-		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
-		css_get(&sk->sk_memcg->css);
-		return;
-	}
-
-	rcu_read_lock();
-	memcg = mem_cgroup_from_task(current);
-	if (memcg != root_mem_cgroup &&
-	    memcg->tcp_mem.active &&
-	    css_tryget_online(&memcg->css))
-		sk->sk_memcg = memcg;
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(sock_update_memcg);
-
-void sock_release_memcg(struct sock *sk)
-{
-	WARN_ON(!sk->sk_memcg);
-	css_put(&sk->sk_memcg->css);
-}
-
-/**
- * mem_cgroup_charge_skmem - charge socket memory
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- *
- * Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
- */
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
-{
-	struct page_counter *counter;
-
-	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
-				    nr_pages, &counter)) {
-		memcg->tcp_mem.memory_pressure = 0;
-		return true;
-	}
-	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
-	memcg->tcp_mem.memory_pressure = 1;
-	return false;
-}
-
-/**
- * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @memcg - memcg to uncharge
- * @nr_pages - number of pages to uncharge
- */
-void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
-{
-	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
-}
-
-#endif
-
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
@@ -5544,6 +5470,80 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 	commit_charge(newpage, memcg, true);
 }
 
+/* Writing them here to avoid exposing memcg's inner layout */
+#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+
+struct static_key memcg_sockets_enabled_key;
+EXPORT_SYMBOL(memcg_sockets_enabled_key);
+
+void sock_update_memcg(struct sock *sk)
+{
+	struct mem_cgroup *memcg;
+
+	/* Socket cloning can throw us here with sk_cgrp already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_memcg) {
+		BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
+		css_get(&sk->sk_memcg->css);
+		return;
+	}
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(current);
+	if (memcg != root_mem_cgroup &&
+	    memcg->tcp_mem.active &&
+	    css_tryget_online(&memcg->css))
+		sk->sk_memcg = memcg;
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+	WARN_ON(!sk->sk_memcg);
+	css_put(&sk->sk_memcg->css);
+}
+
+/**
+ * mem_cgroup_charge_skmem - charge socket memory
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Charges @nr_pages to @memcg. Returns %true if the charge fit within
+ * @memcg's configured limit, %false if the charge had to be forced.
+ */
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+	struct page_counter *counter;
+
+	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
+				    nr_pages, &counter)) {
+		memcg->tcp_mem.memory_pressure = 0;
+		return true;
+	}
+	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+	memcg->tcp_mem.memory_pressure = 1;
+	return false;
+}
+
+/**
+ * mem_cgroup_uncharge_skmem - uncharge socket memory
+ * @memcg - memcg to uncharge
+ * @nr_pages - number of pages to uncharge
+ */
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+{
+	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
+}
+
+#endif
+
 /*
  * subsys_initcall() for memory controller.
  *
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Socket memory can be a significant share of overall memory consumed by
common workloads. In order to provide reasonable resource isolation in
the unified hierarchy, this type of memory needs to be included in the
tracking/accounting of a cgroup under active memory resource control.

Overhead is only incurred when a non-root control group is created AND
the memory controller is instructed to track and account the memory
footprint of that group. cgroup.memory=nosocket can be specified on
the boot commandline to override any runtime configuration and
forcibly exclude socket memory from active memory resource control.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 Documentation/kernel-parameters.txt |   4 ++
 include/linux/memcontrol.h          |   9 ++-
 mm/memcontrol.c                     | 122 +++++++++++++++++++++++++++++-------
 3 files changed, 110 insertions(+), 25 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d..7868f1b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -599,6 +599,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			cut the overhead, others just disable the usage. So
 			only cgroup_disable=memory is actually worthy}
 
+	cgroup.memory=	[KNL] Pass options to the cgroup memory controller.
+			Format: <string>
+			nosocket -- Disable socket memory accounting.
+
 	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
 			Format: { "0" | "1" }
 			See security/selinux/Kconfig help text.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 654c2fb..863ae8d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -170,6 +170,9 @@ struct mem_cgroup {
 	unsigned long low;
 	unsigned long high;
 
+	/* Range enforcement for interrupt charges */
+	struct work_struct high_work;
+
 	unsigned long soft_limit;
 
 	/* vmpressure notifications */
@@ -684,12 +687,16 @@ void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
 extern struct static_key memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_MEMCG_KMEM
 	return memcg->tcp_mem.memory_pressure;
+#else
+	return false;
+#endif
 }
 #else
 #define mem_cgroup_sockets_enabled 0
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ed030b5..59555b0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #define MEM_CGROUP_RECLAIM_RETRIES	5
 
+/* Socket memory accounting disabled? */
+static bool cgroup_memory_nosocket;
+
 /* Whether the swap controller is active */
 #ifdef CONFIG_MEMCG_SWAP
 int do_swap_account __read_mostly;
@@ -1923,6 +1926,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static void reclaim_high(struct mem_cgroup *memcg,
+			 unsigned int nr_pages,
+			 gfp_t gfp_mask)
+{
+	do {
+		if (page_counter_read(&memcg->memory) <= memcg->high)
+			continue;
+		mem_cgroup_events(memcg, MEMCG_HIGH, 1);
+		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+	} while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, high_work);
+	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
 /*
  * Scheduled by try_charge() to be executed from the userland return path
  * and reclaims memory over the high limit.
@@ -1930,20 +1953,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 void mem_cgroup_handle_over_high(void)
 {
 	unsigned int nr_pages = current->memcg_nr_pages_over_high;
-	struct mem_cgroup *memcg, *pos;
+	struct mem_cgroup *memcg;
 
 	if (likely(!nr_pages))
 		return;
 
-	pos = memcg = get_mem_cgroup_from_mm(current->mm);
-
-	do {
-		if (page_counter_read(&pos->memory) <= pos->high)
-			continue;
-		mem_cgroup_events(pos, MEMCG_HIGH, 1);
-		try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
-	} while ((pos = parent_mem_cgroup(pos)));
-
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	reclaim_high(memcg, nr_pages, GFP_KERNEL);
 	css_put(&memcg->css);
 	current->memcg_nr_pages_over_high = 0;
 }
@@ -2078,6 +2094,11 @@ done_restock:
 	 */
 	do {
 		if (page_counter_read(&memcg->memory) > memcg->high) {
+			/* Don't bother a random interrupted task */
+			if (in_interrupt()) {
+				schedule_work(&memcg->high_work);
+				break;
+			}
 			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
@@ -4126,6 +4147,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
+	cancel_work_sync(&memcg->high_work);
+
 	mem_cgroup_remove_from_trees(memcg);
 
 	for_each_node(node)
@@ -4172,6 +4195,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->kmem, NULL);
 	}
 
+	INIT_WORK(&memcg->high_work, high_work_func);
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	memcg->move_charge_at_immigrate = 0;
@@ -4243,6 +4267,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_INET
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
+		static_key_slow_inc(&memcg_sockets_enabled_key);
+#endif
+
 	/*
 	 * Make sure the memcg is initialized: mem_cgroup_iter()
 	 * orders reading memcg->initialized against its callers
@@ -4282,6 +4311,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	memcg_destroy_kmem(memcg);
+#ifdef CONFIG_INET
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
+		static_key_slow_dec(&memcg_sockets_enabled_key);
+#endif
 	__mem_cgroup_free(memcg);
 }
 
@@ -5470,8 +5503,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 	commit_charge(newpage, memcg, true);
 }
 
-/* Writing them here to avoid exposing memcg's inner layout */
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+#ifdef CONFIG_INET
 
 struct static_key memcg_sockets_enabled_key;
 EXPORT_SYMBOL(memcg_sockets_enabled_key);
@@ -5496,10 +5528,15 @@ void sock_update_memcg(struct sock *sk)
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
-	if (memcg != root_mem_cgroup &&
-	    memcg->tcp_mem.active &&
-	    css_tryget_online(&memcg->css))
+	if (memcg == root_mem_cgroup)
+		goto out;
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
+		goto out;
+#endif
+	if (css_tryget_online(&memcg->css))
 		sk->sk_memcg = memcg;
+out:
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
@@ -5520,15 +5557,30 @@ void sock_release_memcg(struct sock *sk)
  */
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	struct page_counter *counter;
+	gfp_t gfp_mask = GFP_KERNEL;
 
-	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
-				    nr_pages, &counter)) {
-		memcg->tcp_mem.memory_pressure = 0;
-		return true;
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+		struct page_counter *counter;
+
+		if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
+					    nr_pages, &counter)) {
+			memcg->tcp_mem.memory_pressure = 0;
+			return true;
+		}
+		page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+		memcg->tcp_mem.memory_pressure = 1;
+		return false;
 	}
-	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
-	memcg->tcp_mem.memory_pressure = 1;
+#endif
+	/* Don't block in the packet receive path */
+	if (in_softirq())
+		gfp_mask = GFP_NOWAIT;
+
+	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+		return true;
+
+	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
 	return false;
 }
 
@@ -5539,10 +5591,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
  */
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+		page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
+				      nr_pages);
+		return;
+	}
+#endif
+	page_counter_uncharge(&memcg->memory, nr_pages);
+	css_put_many(&memcg->css, nr_pages);
 }
 
-#endif
+#endif /* CONFIG_INET */
+
+static int __init cgroup_memory(char *s)
+{
+	char *token;
+
+	while ((token = strsep(&s, ",")) != NULL) {
+		if (!*token)
+			continue;
+		if (!strcmp(token, "nosocket"))
+			cgroup_memory_nosocket = true;
+	}
+	return 0;
+}
+__setup("cgroup.memory=", cgroup_memory);
 
 /*
  * subsys_initcall() for memory controller.
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Socket memory can be a significant share of overall memory consumed by
common workloads. In order to provide reasonable resource isolation in
the unified hierarchy, this type of memory needs to be included in the
tracking/accounting of a cgroup under active memory resource control.

Overhead is only incurred when a non-root control group is created AND
the memory controller is instructed to track and account the memory
footprint of that group. cgroup.memory=nosocket can be specified on
the boot commandline to override any runtime configuration and
forcibly exclude socket memory from active memory resource control.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 Documentation/kernel-parameters.txt |   4 ++
 include/linux/memcontrol.h          |   9 ++-
 mm/memcontrol.c                     | 122 +++++++++++++++++++++++++++++-------
 3 files changed, 110 insertions(+), 25 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d..7868f1b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -599,6 +599,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			cut the overhead, others just disable the usage. So
 			only cgroup_disable=memory is actually worthy}
 
+	cgroup.memory=	[KNL] Pass options to the cgroup memory controller.
+			Format: <string>
+			nosocket -- Disable socket memory accounting.
+
 	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
 			Format: { "0" | "1" }
 			See security/selinux/Kconfig help text.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 654c2fb..863ae8d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -170,6 +170,9 @@ struct mem_cgroup {
 	unsigned long low;
 	unsigned long high;
 
+	/* Range enforcement for interrupt charges */
+	struct work_struct high_work;
+
 	unsigned long soft_limit;
 
 	/* vmpressure notifications */
@@ -684,12 +687,16 @@ void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
+#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
 extern struct static_key memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_MEMCG_KMEM
 	return memcg->tcp_mem.memory_pressure;
+#else
+	return false;
+#endif
 }
 #else
 #define mem_cgroup_sockets_enabled 0
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ed030b5..59555b0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #define MEM_CGROUP_RECLAIM_RETRIES	5
 
+/* Socket memory accounting disabled? */
+static bool cgroup_memory_nosocket;
+
 /* Whether the swap controller is active */
 #ifdef CONFIG_MEMCG_SWAP
 int do_swap_account __read_mostly;
@@ -1923,6 +1926,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static void reclaim_high(struct mem_cgroup *memcg,
+			 unsigned int nr_pages,
+			 gfp_t gfp_mask)
+{
+	do {
+		if (page_counter_read(&memcg->memory) <= memcg->high)
+			continue;
+		mem_cgroup_events(memcg, MEMCG_HIGH, 1);
+		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+	} while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, high_work);
+	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
 /*
  * Scheduled by try_charge() to be executed from the userland return path
  * and reclaims memory over the high limit.
@@ -1930,20 +1953,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 void mem_cgroup_handle_over_high(void)
 {
 	unsigned int nr_pages = current->memcg_nr_pages_over_high;
-	struct mem_cgroup *memcg, *pos;
+	struct mem_cgroup *memcg;
 
 	if (likely(!nr_pages))
 		return;
 
-	pos = memcg = get_mem_cgroup_from_mm(current->mm);
-
-	do {
-		if (page_counter_read(&pos->memory) <= pos->high)
-			continue;
-		mem_cgroup_events(pos, MEMCG_HIGH, 1);
-		try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
-	} while ((pos = parent_mem_cgroup(pos)));
-
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	reclaim_high(memcg, nr_pages, GFP_KERNEL);
 	css_put(&memcg->css);
 	current->memcg_nr_pages_over_high = 0;
 }
@@ -2078,6 +2094,11 @@ done_restock:
 	 */
 	do {
 		if (page_counter_read(&memcg->memory) > memcg->high) {
+			/* Don't bother a random interrupted task */
+			if (in_interrupt()) {
+				schedule_work(&memcg->high_work);
+				break;
+			}
 			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
@@ -4126,6 +4147,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
+	cancel_work_sync(&memcg->high_work);
+
 	mem_cgroup_remove_from_trees(memcg);
 
 	for_each_node(node)
@@ -4172,6 +4195,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->kmem, NULL);
 	}
 
+	INIT_WORK(&memcg->high_work, high_work_func);
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	memcg->move_charge_at_immigrate = 0;
@@ -4243,6 +4267,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_INET
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
+		static_key_slow_inc(&memcg_sockets_enabled_key);
+#endif
+
 	/*
 	 * Make sure the memcg is initialized: mem_cgroup_iter()
 	 * orders reading memcg->initialized against its callers
@@ -4282,6 +4311,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
 	memcg_destroy_kmem(memcg);
+#ifdef CONFIG_INET
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
+		static_key_slow_dec(&memcg_sockets_enabled_key);
+#endif
 	__mem_cgroup_free(memcg);
 }
 
@@ -5470,8 +5503,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 	commit_charge(newpage, memcg, true);
 }
 
-/* Writing them here to avoid exposing memcg's inner layout */
-#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+#ifdef CONFIG_INET
 
 struct static_key memcg_sockets_enabled_key;
 EXPORT_SYMBOL(memcg_sockets_enabled_key);
@@ -5496,10 +5528,15 @@ void sock_update_memcg(struct sock *sk)
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
-	if (memcg != root_mem_cgroup &&
-	    memcg->tcp_mem.active &&
-	    css_tryget_online(&memcg->css))
+	if (memcg == root_mem_cgroup)
+		goto out;
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
+		goto out;
+#endif
+	if (css_tryget_online(&memcg->css))
 		sk->sk_memcg = memcg;
+out:
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
@@ -5520,15 +5557,30 @@ void sock_release_memcg(struct sock *sk)
  */
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	struct page_counter *counter;
+	gfp_t gfp_mask = GFP_KERNEL;
 
-	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
-				    nr_pages, &counter)) {
-		memcg->tcp_mem.memory_pressure = 0;
-		return true;
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+		struct page_counter *counter;
+
+		if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
+					    nr_pages, &counter)) {
+			memcg->tcp_mem.memory_pressure = 0;
+			return true;
+		}
+		page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+		memcg->tcp_mem.memory_pressure = 1;
+		return false;
 	}
-	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
-	memcg->tcp_mem.memory_pressure = 1;
+#endif
+	/* Don't block in the packet receive path */
+	if (in_softirq())
+		gfp_mask = GFP_NOWAIT;
+
+	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+		return true;
+
+	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
 	return false;
 }
 
@@ -5539,10 +5591,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
  */
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
+#ifdef CONFIG_MEMCG_KMEM
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+		page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
+				      nr_pages);
+		return;
+	}
+#endif
+	page_counter_uncharge(&memcg->memory, nr_pages);
+	css_put_many(&memcg->css, nr_pages);
 }
 
-#endif
+#endif /* CONFIG_INET */
+
+static int __init cgroup_memory(char *s)
+{
+	char *token;
+
+	while ((token = strsep(&s, ",")) != NULL) {
+		if (!*token)
+			continue;
+		if (!strcmp(token, "nosocket"))
+			cgroup_memory_nosocket = true;
+	}
+	return 0;
+}
+__setup("cgroup.memory=", cgroup_memory);
 
 /*
  * subsys_initcall() for memory controller.
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 13/14] mm: memcontrol: hook up vmpressure to socket pressure
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Let the networking stack know when a memcg is under reclaim pressure
so that it can clamp its transmit windows accordingly.

Whenever the reclaim efficiency of a cgroup's LRU lists drops low
enough for a MEDIUM or HIGH vmpressure event to occur, assert a
pressure state in the socket and tcp memory code that tells it to curb
consumption growth from sockets associated with said control group.

Traditionally, vmpressure reports for the entire subtree of a memcg
under pressure, which drops useful information on the individual
groups reclaimed. However, it's too late to change the userinterface,
so add a second reporting mode that reports on the level of reclaim
instead of at the level of pressure, and use that report for sockets.

vmpressure events are naturally edge triggered, so for hysteresis
assert socket pressure for a second to allow for subsequent vmpressure
events to occur before letting the socket code return to normal.

This will likely need finetuning for a wider variety of workloads, but
for now stick to the vmpressure presets and keep hysteresis simple.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h | 32 ++++++++++++++++---
 include/linux/vmpressure.h |  7 +++--
 mm/memcontrol.c            | 17 ++--------
 mm/vmpressure.c            | 78 +++++++++++++++++++++++++++++++++++-----------
 mm/vmscan.c                | 10 +++++-
 5 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 863ae8d..e4f6721 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -249,6 +249,10 @@ struct mem_cgroup {
 	struct wb_domain cgwb_domain;
 #endif
 
+#ifdef CONFIG_INET
+	unsigned long		socket_pressure;
+#endif
+
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
 	spinlock_t event_list_lock;
@@ -292,18 +296,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
 }
 
+#define mem_cgroup_from_counter(counter, member)	\
+	container_of(counter, struct mem_cgroup, member)
+
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
+/**
+ * parent_mem_cgroup - find the accounting parent of a memcg
+ * @memcg: memcg whose parent to find
+ *
+ * Returns the parent memcg, or NULL if this is the root or the memory
+ * controller is in legacy no-hierarchy mode.
+ */
+static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+{
+	if (!memcg->memory.parent)
+		return NULL;
+	return mem_cgroup_from_counter(memcg->memory.parent, memory);
+}
+
 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
 			      struct mem_cgroup *root)
 {
@@ -693,10 +713,14 @@ extern struct static_key memcg_sockets_enabled_key;
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
-	return memcg->tcp_mem.memory_pressure;
-#else
-	return false;
+	if (memcg->tcp_mem.memory_pressure)
+		return true;
 #endif
+	do {
+		if (time_before(jiffies, memcg->socket_pressure))
+			return true;
+	} while ((memcg = parent_mem_cgroup(memcg)));
+	return false;
 }
 #else
 #define mem_cgroup_sockets_enabled 0
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 3e45358..3347cc3 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -12,6 +12,9 @@
 struct vmpressure {
 	unsigned long scanned;
 	unsigned long reclaimed;
+
+	unsigned long tree_scanned;
+	unsigned long tree_reclaimed;
 	/* The lock is used to keep the scanned/reclaimed above in sync. */
 	struct spinlock sr_lock;
 
@@ -26,7 +29,7 @@ struct vmpressure {
 struct mem_cgroup;
 
 #ifdef CONFIG_MEMCG
-extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		       unsigned long scanned, unsigned long reclaimed);
 extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
 
@@ -40,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
 extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
 					struct eventfd_ctx *eventfd);
 #else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			      unsigned long scanned, unsigned long reclaimed) {}
 static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
 				   int prio) {}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59555b0..a0da91f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1091,9 +1091,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 	return ret;
 }
 
-#define mem_cgroup_from_counter(counter, member)	\
-	container_of(counter, struct mem_cgroup, member)
-
 /**
  * mem_cgroup_margin - calculate chargeable space of a memory cgroup
  * @memcg: the memory cgroup
@@ -4159,17 +4156,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	kfree(memcg);
 }
 
-/*
- * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
- */
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
-{
-	if (!memcg->memory.parent)
-		return NULL;
-	return mem_cgroup_from_counter(memcg->memory.parent, memory);
-}
-EXPORT_SYMBOL(parent_mem_cgroup);
-
 static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -4210,6 +4196,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 #endif
+#ifdef CONFIG_INET
+	memcg->socket_pressure = jiffies;
+#endif
 	return &memcg->css;
 
 free_out:
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 4c25e62..af262bb 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -137,14 +137,11 @@ struct vmpressure_event {
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-			     unsigned long scanned, unsigned long reclaimed)
+			     enum vmpressure_levels level)
 {
 	struct vmpressure_event *ev;
-	enum vmpressure_levels level;
 	bool signalled = false;
 
-	level = vmpressure_calc_level(scanned, reclaimed);
-
 	mutex_lock(&vmpr->events_lock);
 
 	list_for_each_entry(ev, &vmpr->events, node) {
@@ -164,6 +161,7 @@ static void vmpressure_work_fn(struct work_struct *work)
 	struct vmpressure *vmpr = work_to_vmpressure(work);
 	unsigned long scanned;
 	unsigned long reclaimed;
+	enum vmpressure_levels level;
 
 	spin_lock(&vmpr->sr_lock);
 	/*
@@ -174,19 +172,21 @@ static void vmpressure_work_fn(struct work_struct *work)
 	 * here. No need for any locks here since we don't care if
 	 * vmpr->reclaimed is in sync.
 	 */
-	scanned = vmpr->scanned;
+	scanned = vmpr->tree_scanned;
 	if (!scanned) {
 		spin_unlock(&vmpr->sr_lock);
 		return;
 	}
 
-	reclaimed = vmpr->reclaimed;
-	vmpr->scanned = 0;
-	vmpr->reclaimed = 0;
+	reclaimed = vmpr->tree_reclaimed;
+	vmpr->tree_scanned = 0;
+	vmpr->tree_reclaimed = 0;
 	spin_unlock(&vmpr->sr_lock);
 
+	level = vmpressure_calc_level(scanned, reclaimed);
+
 	do {
-		if (vmpressure_event(vmpr, scanned, reclaimed))
+		if (vmpressure_event(vmpr, level))
 			break;
 		/*
 		 * If not handled, propagate the event upward into the
@@ -199,6 +199,7 @@ static void vmpressure_work_fn(struct work_struct *work)
  * vmpressure() - Account memory pressure through scanned/reclaimed ratio
  * @gfp:	reclaimer's gfp mask
  * @memcg:	cgroup memory controller handle
+ * @tree:	legacy subtree mode
  * @scanned:	number of pages scanned
  * @reclaimed:	number of pages reclaimed
  *
@@ -206,9 +207,16 @@ static void vmpressure_work_fn(struct work_struct *work)
  * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
  * pressure index is then further refined and averaged over time.
  *
+ * If @tree is set, vmpressure is in traditional userspace reporting
+ * mode: @memcg is considered the pressure root and userspace is
+ * notified of the entire subtree's reclaim efficiency.
+ *
+ * If @tree is not set, reclaim efficiency is recorded for @memcg, and
+ * only in-kernel users are notified.
+ *
  * This function does not return any value.
  */
-void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		unsigned long scanned, unsigned long reclaimed)
 {
 	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
@@ -238,15 +246,47 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
 	if (!scanned)
 		return;
 
-	spin_lock(&vmpr->sr_lock);
-	vmpr->scanned += scanned;
-	vmpr->reclaimed += reclaimed;
-	scanned = vmpr->scanned;
-	spin_unlock(&vmpr->sr_lock);
+	if (tree) {
+		spin_lock(&vmpr->sr_lock);
+		vmpr->tree_scanned += scanned;
+		vmpr->tree_reclaimed += reclaimed;
+		scanned = vmpr->scanned;
+		spin_unlock(&vmpr->sr_lock);
 
-	if (scanned < vmpressure_win)
-		return;
-	schedule_work(&vmpr->work);
+		if (scanned < vmpressure_win)
+			return;
+		schedule_work(&vmpr->work);
+	} else {
+		enum vmpressure_levels level;
+
+		/* For now, no users for root-level efficiency */
+		if (memcg == root_mem_cgroup)
+			return;
+
+		spin_lock(&vmpr->sr_lock);
+		scanned = vmpr->scanned += scanned;
+		reclaimed = vmpr->reclaimed += reclaimed;
+		if (scanned < vmpressure_win) {
+			spin_unlock(&vmpr->sr_lock);
+			return;
+		}
+		vmpr->scanned = vmpr->reclaimed = 0;
+		spin_unlock(&vmpr->sr_lock);
+
+		level = vmpressure_calc_level(scanned, reclaimed);
+
+		if (level > VMPRESSURE_LOW) {
+			/*
+			 * Let the socket buffer allocator know that
+			 * we are having trouble reclaiming LRU pages.
+			 *
+			 * For hysteresis keep the pressure state
+			 * asserted for a second in which subsequent
+			 * pressure events can occur.
+			 */
+			memcg->socket_pressure = jiffies + HZ;
+		}
+	}
 }
 
 /**
@@ -276,7 +316,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 	 * to the vmpressure() basically means that we signal 'critical'
 	 * level.
 	 */
-	vmpressure(gfp, memcg, vmpressure_win, 0);
+	vmpressure(gfp, memcg, true, vmpressure_win, 0);
 }
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 97ba9e1..50e54c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2396,6 +2396,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 		memcg = mem_cgroup_iter(root, NULL, &reclaim);
 		do {
 			unsigned long lru_pages;
+			unsigned long reclaimed;
 			unsigned long scanned;
 			struct lruvec *lruvec;
 			int swappiness;
@@ -2408,6 +2409,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 			swappiness = mem_cgroup_swappiness(memcg);
+			reclaimed = sc->nr_reclaimed;
 			scanned = sc->nr_scanned;
 
 			shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
@@ -2418,6 +2420,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 					    memcg, sc->nr_scanned - scanned,
 					    lru_pages);
 
+			/* Record the group's reclaim efficiency */
+			vmpressure(sc->gfp_mask, memcg, false,
+				   sc->nr_scanned - scanned,
+				   sc->nr_reclaimed - reclaimed);
+
 			/*
 			 * Direct reclaim and kswapd have to scan all memory
 			 * cgroups to fulfill the overall scan target for the
@@ -2449,7 +2456,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 			reclaim_state->reclaimed_slab = 0;
 		}
 
-		vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
+		/* Record the subtree's reclaim efficiency */
+		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
 			   sc->nr_scanned - nr_scanned,
 			   sc->nr_reclaimed - nr_reclaimed);
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 13/14] mm: memcontrol: hook up vmpressure to socket pressure
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

Let the networking stack know when a memcg is under reclaim pressure
so that it can clamp its transmit windows accordingly.

Whenever the reclaim efficiency of a cgroup's LRU lists drops low
enough for a MEDIUM or HIGH vmpressure event to occur, assert a
pressure state in the socket and tcp memory code that tells it to curb
consumption growth from sockets associated with said control group.

Traditionally, vmpressure reports for the entire subtree of a memcg
under pressure, which drops useful information on the individual
groups reclaimed. However, it's too late to change the userinterface,
so add a second reporting mode that reports on the level of reclaim
instead of at the level of pressure, and use that report for sockets.

vmpressure events are naturally edge triggered, so for hysteresis
assert socket pressure for a second to allow for subsequent vmpressure
events to occur before letting the socket code return to normal.

This will likely need finetuning for a wider variety of workloads, but
for now stick to the vmpressure presets and keep hysteresis simple.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
---
 include/linux/memcontrol.h | 32 ++++++++++++++++---
 include/linux/vmpressure.h |  7 +++--
 mm/memcontrol.c            | 17 ++--------
 mm/vmpressure.c            | 78 +++++++++++++++++++++++++++++++++++-----------
 mm/vmscan.c                | 10 +++++-
 5 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 863ae8d..e4f6721 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -249,6 +249,10 @@ struct mem_cgroup {
 	struct wb_domain cgwb_domain;
 #endif
 
+#ifdef CONFIG_INET
+	unsigned long		socket_pressure;
+#endif
+
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
 	spinlock_t event_list_lock;
@@ -292,18 +296,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
 }
 
+#define mem_cgroup_from_counter(counter, member)	\
+	container_of(counter, struct mem_cgroup, member)
+
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
+/**
+ * parent_mem_cgroup - find the accounting parent of a memcg
+ * @memcg: memcg whose parent to find
+ *
+ * Returns the parent memcg, or NULL if this is the root or the memory
+ * controller is in legacy no-hierarchy mode.
+ */
+static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+{
+	if (!memcg->memory.parent)
+		return NULL;
+	return mem_cgroup_from_counter(memcg->memory.parent, memory);
+}
+
 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
 			      struct mem_cgroup *root)
 {
@@ -693,10 +713,14 @@ extern struct static_key memcg_sockets_enabled_key;
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
-	return memcg->tcp_mem.memory_pressure;
-#else
-	return false;
+	if (memcg->tcp_mem.memory_pressure)
+		return true;
 #endif
+	do {
+		if (time_before(jiffies, memcg->socket_pressure))
+			return true;
+	} while ((memcg = parent_mem_cgroup(memcg)));
+	return false;
 }
 #else
 #define mem_cgroup_sockets_enabled 0
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 3e45358..3347cc3 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -12,6 +12,9 @@
 struct vmpressure {
 	unsigned long scanned;
 	unsigned long reclaimed;
+
+	unsigned long tree_scanned;
+	unsigned long tree_reclaimed;
 	/* The lock is used to keep the scanned/reclaimed above in sync. */
 	struct spinlock sr_lock;
 
@@ -26,7 +29,7 @@ struct vmpressure {
 struct mem_cgroup;
 
 #ifdef CONFIG_MEMCG
-extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		       unsigned long scanned, unsigned long reclaimed);
 extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
 
@@ -40,7 +43,7 @@ extern int vmpressure_register_event(struct mem_cgroup *memcg,
 extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
 					struct eventfd_ctx *eventfd);
 #else
-static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			      unsigned long scanned, unsigned long reclaimed) {}
 static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
 				   int prio) {}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59555b0..a0da91f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1091,9 +1091,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 	return ret;
 }
 
-#define mem_cgroup_from_counter(counter, member)	\
-	container_of(counter, struct mem_cgroup, member)
-
 /**
  * mem_cgroup_margin - calculate chargeable space of a memory cgroup
  * @memcg: the memory cgroup
@@ -4159,17 +4156,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	kfree(memcg);
 }
 
-/*
- * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
- */
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
-{
-	if (!memcg->memory.parent)
-		return NULL;
-	return mem_cgroup_from_counter(memcg->memory.parent, memory);
-}
-EXPORT_SYMBOL(parent_mem_cgroup);
-
 static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -4210,6 +4196,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
 #endif
+#ifdef CONFIG_INET
+	memcg->socket_pressure = jiffies;
+#endif
 	return &memcg->css;
 
 free_out:
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 4c25e62..af262bb 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -137,14 +137,11 @@ struct vmpressure_event {
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-			     unsigned long scanned, unsigned long reclaimed)
+			     enum vmpressure_levels level)
 {
 	struct vmpressure_event *ev;
-	enum vmpressure_levels level;
 	bool signalled = false;
 
-	level = vmpressure_calc_level(scanned, reclaimed);
-
 	mutex_lock(&vmpr->events_lock);
 
 	list_for_each_entry(ev, &vmpr->events, node) {
@@ -164,6 +161,7 @@ static void vmpressure_work_fn(struct work_struct *work)
 	struct vmpressure *vmpr = work_to_vmpressure(work);
 	unsigned long scanned;
 	unsigned long reclaimed;
+	enum vmpressure_levels level;
 
 	spin_lock(&vmpr->sr_lock);
 	/*
@@ -174,19 +172,21 @@ static void vmpressure_work_fn(struct work_struct *work)
 	 * here. No need for any locks here since we don't care if
 	 * vmpr->reclaimed is in sync.
 	 */
-	scanned = vmpr->scanned;
+	scanned = vmpr->tree_scanned;
 	if (!scanned) {
 		spin_unlock(&vmpr->sr_lock);
 		return;
 	}
 
-	reclaimed = vmpr->reclaimed;
-	vmpr->scanned = 0;
-	vmpr->reclaimed = 0;
+	reclaimed = vmpr->tree_reclaimed;
+	vmpr->tree_scanned = 0;
+	vmpr->tree_reclaimed = 0;
 	spin_unlock(&vmpr->sr_lock);
 
+	level = vmpressure_calc_level(scanned, reclaimed);
+
 	do {
-		if (vmpressure_event(vmpr, scanned, reclaimed))
+		if (vmpressure_event(vmpr, level))
 			break;
 		/*
 		 * If not handled, propagate the event upward into the
@@ -199,6 +199,7 @@ static void vmpressure_work_fn(struct work_struct *work)
  * vmpressure() - Account memory pressure through scanned/reclaimed ratio
  * @gfp:	reclaimer's gfp mask
  * @memcg:	cgroup memory controller handle
+ * @tree:	legacy subtree mode
  * @scanned:	number of pages scanned
  * @reclaimed:	number of pages reclaimed
  *
@@ -206,9 +207,16 @@ static void vmpressure_work_fn(struct work_struct *work)
  * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
  * pressure index is then further refined and averaged over time.
  *
+ * If @tree is set, vmpressure is in traditional userspace reporting
+ * mode: @memcg is considered the pressure root and userspace is
+ * notified of the entire subtree's reclaim efficiency.
+ *
+ * If @tree is not set, reclaim efficiency is recorded for @memcg, and
+ * only in-kernel users are notified.
+ *
  * This function does not return any value.
  */
-void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 		unsigned long scanned, unsigned long reclaimed)
 {
 	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
@@ -238,15 +246,47 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
 	if (!scanned)
 		return;
 
-	spin_lock(&vmpr->sr_lock);
-	vmpr->scanned += scanned;
-	vmpr->reclaimed += reclaimed;
-	scanned = vmpr->scanned;
-	spin_unlock(&vmpr->sr_lock);
+	if (tree) {
+		spin_lock(&vmpr->sr_lock);
+		vmpr->tree_scanned += scanned;
+		vmpr->tree_reclaimed += reclaimed;
+		scanned = vmpr->scanned;
+		spin_unlock(&vmpr->sr_lock);
 
-	if (scanned < vmpressure_win)
-		return;
-	schedule_work(&vmpr->work);
+		if (scanned < vmpressure_win)
+			return;
+		schedule_work(&vmpr->work);
+	} else {
+		enum vmpressure_levels level;
+
+		/* For now, no users for root-level efficiency */
+		if (memcg == root_mem_cgroup)
+			return;
+
+		spin_lock(&vmpr->sr_lock);
+		scanned = vmpr->scanned += scanned;
+		reclaimed = vmpr->reclaimed += reclaimed;
+		if (scanned < vmpressure_win) {
+			spin_unlock(&vmpr->sr_lock);
+			return;
+		}
+		vmpr->scanned = vmpr->reclaimed = 0;
+		spin_unlock(&vmpr->sr_lock);
+
+		level = vmpressure_calc_level(scanned, reclaimed);
+
+		if (level > VMPRESSURE_LOW) {
+			/*
+			 * Let the socket buffer allocator know that
+			 * we are having trouble reclaiming LRU pages.
+			 *
+			 * For hysteresis keep the pressure state
+			 * asserted for a second in which subsequent
+			 * pressure events can occur.
+			 */
+			memcg->socket_pressure = jiffies + HZ;
+		}
+	}
 }
 
 /**
@@ -276,7 +316,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 	 * to the vmpressure() basically means that we signal 'critical'
 	 * level.
 	 */
-	vmpressure(gfp, memcg, vmpressure_win, 0);
+	vmpressure(gfp, memcg, true, vmpressure_win, 0);
 }
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 97ba9e1..50e54c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2396,6 +2396,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 		memcg = mem_cgroup_iter(root, NULL, &reclaim);
 		do {
 			unsigned long lru_pages;
+			unsigned long reclaimed;
 			unsigned long scanned;
 			struct lruvec *lruvec;
 			int swappiness;
@@ -2408,6 +2409,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 			swappiness = mem_cgroup_swappiness(memcg);
+			reclaimed = sc->nr_reclaimed;
 			scanned = sc->nr_scanned;
 
 			shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
@@ -2418,6 +2420,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 					    memcg, sc->nr_scanned - scanned,
 					    lru_pages);
 
+			/* Record the group's reclaim efficiency */
+			vmpressure(sc->gfp_mask, memcg, false,
+				   sc->nr_scanned - scanned,
+				   sc->nr_reclaimed - reclaimed);
+
 			/*
 			 * Direct reclaim and kswapd have to scan all memory
 			 * cgroups to fulfill the overall scan target for the
@@ -2449,7 +2456,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
 			reclaim_state->reclaimed_slab = 0;
 		}
 
-		vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
+		/* Record the subtree's reclaim efficiency */
+		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
 			   sc->nr_scanned - nr_scanned,
 			   sc->nr_reclaimed - nr_reclaimed);
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 14/14] mm: memcontrol: switch to the updated jump-label API
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 15:30   ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

According to <linux/jump_label.h> the direct use of struct static_key
is deprecated. Update the socket and slab accounting code accordingly.

Reported-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/memcontrol.h |  8 ++++----
 mm/memcontrol.c            | 12 ++++++------
 net/ipv4/tcp_memcontrol.c  |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e4f6721..189f04d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -708,8 +708,8 @@ void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
-extern struct static_key memcg_sockets_enabled_key;
-#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
+extern struct static_key_false memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
@@ -731,7 +731,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
-extern struct static_key memcg_kmem_enabled_key;
+extern struct static_key_false memcg_kmem_enabled_key;
 
 extern int memcg_nr_cache_ids;
 void memcg_get_cache_ids(void);
@@ -747,7 +747,7 @@ void memcg_put_cache_ids(void);
 
 static inline bool memcg_kmem_enabled(void)
 {
-	return static_key_false(&memcg_kmem_enabled_key);
+	return static_branch_unlikely(&memcg_kmem_enabled_key);
 }
 
 static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0da91f..5fe45d68 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -346,7 +346,7 @@ void memcg_put_cache_ids(void)
  * conditional to this static branch, we'll have to allow modules that does
  * kmem_cache_alloc and the such to see this symbol as well
  */
-struct static_key memcg_kmem_enabled_key;
+DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 #endif /* CONFIG_MEMCG_KMEM */
@@ -2883,7 +2883,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
 	err = page_counter_limit(&memcg->kmem, nr_pages);
 	VM_BUG_ON(err);
 
-	static_key_slow_inc(&memcg_kmem_enabled_key);
+	static_branch_inc(&memcg_kmem_enabled_key);
 	/*
 	 * A memory cgroup is considered kmem-active as soon as it gets
 	 * kmemcg_id. Setting the id after enabling static branching will
@@ -3622,7 +3622,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
 	if (memcg->kmem_acct_activated) {
 		memcg_destroy_kmem_caches(memcg);
-		static_key_slow_dec(&memcg_kmem_enabled_key);
+		static_branch_dec(&memcg_kmem_enabled_key);
 		WARN_ON(page_counter_read(&memcg->kmem));
 	}
 	tcp_destroy_cgroup(memcg);
@@ -4258,7 +4258,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 
 #ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
-		static_key_slow_inc(&memcg_sockets_enabled_key);
+		static_branch_inc(&memcg_sockets_enabled_key);
 #endif
 
 	/*
@@ -4302,7 +4302,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	memcg_destroy_kmem(memcg);
 #ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
-		static_key_slow_dec(&memcg_sockets_enabled_key);
+		static_branch_dec(&memcg_sockets_enabled_key);
 #endif
 	__mem_cgroup_free(memcg);
 }
@@ -5494,7 +5494,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 
 #ifdef CONFIG_INET
 
-struct static_key memcg_sockets_enabled_key;
+DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
 EXPORT_SYMBOL(memcg_sockets_enabled_key);
 
 void sock_update_memcg(struct sock *sk)
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 9a22e2d..18bc7f7 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 		return;
 
 	if (memcg->tcp_mem.active)
-		static_key_slow_dec(&memcg_sockets_enabled_key);
+		static_branch_dec(&memcg_sockets_enabled_key);
 }
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
@@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * because when this value change, the code to process it is not
 		 * patched in yet.
 		 */
-		static_key_slow_inc(&memcg_sockets_enabled_key);
+		static_branch_inc(&memcg_sockets_enabled_key);
 		memcg->tcp_mem.active = true;
 	}
 
-- 
2.6.3


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH 14/14] mm: memcontrol: switch to the updated jump-label API
@ 2015-12-08 15:30   ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-08 15:30 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-mm, netdev, cgroups, linux-kernel

According to <linux/jump_label.h> the direct use of struct static_key
is deprecated. Update the socket and slab accounting code accordingly.

Reported-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/memcontrol.h |  8 ++++----
 mm/memcontrol.c            | 12 ++++++------
 net/ipv4/tcp_memcontrol.c  |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e4f6721..189f04d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -708,8 +708,8 @@ void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
-extern struct static_key memcg_sockets_enabled_key;
-#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
+extern struct static_key_false memcg_sockets_enabled_key;
+#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_KMEM
@@ -731,7 +731,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
-extern struct static_key memcg_kmem_enabled_key;
+extern struct static_key_false memcg_kmem_enabled_key;
 
 extern int memcg_nr_cache_ids;
 void memcg_get_cache_ids(void);
@@ -747,7 +747,7 @@ void memcg_put_cache_ids(void);
 
 static inline bool memcg_kmem_enabled(void)
 {
-	return static_key_false(&memcg_kmem_enabled_key);
+	return static_branch_unlikely(&memcg_kmem_enabled_key);
 }
 
 static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0da91f..5fe45d68 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -346,7 +346,7 @@ void memcg_put_cache_ids(void)
  * conditional to this static branch, we'll have to allow modules that does
  * kmem_cache_alloc and the such to see this symbol as well
  */
-struct static_key memcg_kmem_enabled_key;
+DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 #endif /* CONFIG_MEMCG_KMEM */
@@ -2883,7 +2883,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
 	err = page_counter_limit(&memcg->kmem, nr_pages);
 	VM_BUG_ON(err);
 
-	static_key_slow_inc(&memcg_kmem_enabled_key);
+	static_branch_inc(&memcg_kmem_enabled_key);
 	/*
 	 * A memory cgroup is considered kmem-active as soon as it gets
 	 * kmemcg_id. Setting the id after enabling static branching will
@@ -3622,7 +3622,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
 	if (memcg->kmem_acct_activated) {
 		memcg_destroy_kmem_caches(memcg);
-		static_key_slow_dec(&memcg_kmem_enabled_key);
+		static_branch_dec(&memcg_kmem_enabled_key);
 		WARN_ON(page_counter_read(&memcg->kmem));
 	}
 	tcp_destroy_cgroup(memcg);
@@ -4258,7 +4258,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 
 #ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
-		static_key_slow_inc(&memcg_sockets_enabled_key);
+		static_branch_inc(&memcg_sockets_enabled_key);
 #endif
 
 	/*
@@ -4302,7 +4302,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	memcg_destroy_kmem(memcg);
 #ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
-		static_key_slow_dec(&memcg_sockets_enabled_key);
+		static_branch_dec(&memcg_sockets_enabled_key);
 #endif
 	__mem_cgroup_free(memcg);
 }
@@ -5494,7 +5494,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 
 #ifdef CONFIG_INET
 
-struct static_key memcg_sockets_enabled_key;
+DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
 EXPORT_SYMBOL(memcg_sockets_enabled_key);
 
 void sock_update_memcg(struct sock *sk)
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 9a22e2d..18bc7f7 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 		return;
 
 	if (memcg->tcp_mem.active)
-		static_key_slow_dec(&memcg_sockets_enabled_key);
+		static_branch_dec(&memcg_sockets_enabled_key);
 }
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
@@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 		 * because when this value change, the code to process it is not
 		 * patched in yet.
 		 */
-		static_key_slow_inc(&memcg_sockets_enabled_key);
+		static_branch_inc(&memcg_sockets_enabled_key);
 		memcg->tcp_mem.active = true;
 	}
 
-- 
2.6.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/14] mm: memcontrol: switch to the updated jump-label API
  2015-12-08 15:30   ` Johannes Weiner
@ 2015-12-08 16:28     ` David Miller
  -1 siblings, 0 replies; 66+ messages in thread
From: David Miller @ 2015-12-08 16:28 UTC (permalink / raw)
  To: hannes; +Cc: akpm, linux-mm, netdev, cgroups, linux-kernel

From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue,  8 Dec 2015 10:30:24 -0500

> According to <linux/jump_label.h> the direct use of struct static_key
> is deprecated. Update the socket and slab accounting code accordingly.
> 
> Reported-by: Jason Baron <jbaron@akamai.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 14/14] mm: memcontrol: switch to the updated jump-label API
@ 2015-12-08 16:28     ` David Miller
  0 siblings, 0 replies; 66+ messages in thread
From: David Miller @ 2015-12-08 16:28 UTC (permalink / raw)
  To: hannes; +Cc: akpm, linux-mm, netdev, cgroups, linux-kernel

From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue,  8 Dec 2015 10:30:24 -0500

> According to <linux/jump_label.h> the direct use of struct static_key
> is deprecated. Update the socket and slab accounting code accordingly.
> 
> Reported-by: Jason Baron <jbaron@akamai.com>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Acked-by: David S. Miller <davem@davemloft.net>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
  2015-12-08 15:30 ` Johannes Weiner
@ 2015-12-08 16:28   ` David Miller
  -1 siblings, 0 replies; 66+ messages in thread
From: David Miller @ 2015-12-08 16:28 UTC (permalink / raw)
  To: hannes; +Cc: akpm, linux-mm, netdev, cgroups, linux-kernel

From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue,  8 Dec 2015 10:30:10 -0500

> Hi Andrew,
> 
> there was some build breakage in CONFIG_ combinations I hadn't tested
> in the last revision, so here is a fixed-up resend with minimal CC
> list. The only difference to the previous version is a section in
> memcontrol.h, but it accumulates throughout the series and would have
> been a pain to resolve on your end. So here goes. This also includes
> the review tags that Dave and Vlad had sent out in the meantime.
> 
> Difference to the original v4:

All looks fine to me:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-08 16:28   ` David Miller
  0 siblings, 0 replies; 66+ messages in thread
From: David Miller @ 2015-12-08 16:28 UTC (permalink / raw)
  To: hannes; +Cc: akpm, linux-mm, netdev, cgroups, linux-kernel

From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue,  8 Dec 2015 10:30:10 -0500

> Hi Andrew,
> 
> there was some build breakage in CONFIG_ combinations I hadn't tested
> in the last revision, so here is a fixed-up resend with minimal CC
> list. The only difference to the previous version is a section in
> memcontrol.h, but it accumulates throughout the series and would have
> been a pain to resolve on your end. So here goes. This also includes
> the review tags that Dave and Vlad had sent out in the meantime.
> 
> Difference to the original v4:

All looks fine to me:

Acked-by: David S. Miller <davem@davemloft.net>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-09 16:31   ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:31 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Tuesday 08 December 2015 10:30:10 Johannes Weiner wrote:
> Hi Andrew,
> 
> there was some build breakage in CONFIG_ combinations I hadn't tested
> in the last revision, so here is a fixed-up resend with minimal CC
> list. The only difference to the previous version is a section in
> memcontrol.h, but it accumulates throughout the series and would have
> been a pain to resolve on your end. So here goes. This also includes
> the review tags that Dave and Vlad had sent out in the meantime.
> 
> Difference to the original v4:

I needed two more patches on top of today's linux-next kernel, will
send them as replies to this mail. I don't know if you have already
fixed the issues for !CONFIG_INET and CONFIG_SLOB, if not please
fold them into your series.

	Arnd

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-09 16:31   ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:31 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Tuesday 08 December 2015 10:30:10 Johannes Weiner wrote:
> Hi Andrew,
> 
> there was some build breakage in CONFIG_ combinations I hadn't tested
> in the last revision, so here is a fixed-up resend with minimal CC
> list. The only difference to the previous version is a section in
> memcontrol.h, but it accumulates throughout the series and would have
> been a pain to resolve on your end. So here goes. This also includes
> the review tags that Dave and Vlad had sent out in the meantime.
> 
> Difference to the original v4:

I needed two more patches on top of today's linux-next kernel, will
send them as replies to this mail. I don't know if you have already
fixed the issues for !CONFIG_INET and CONFIG_SLOB, if not please
fold them into your series.

	Arnd

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-09 16:31   ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:31 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Tuesday 08 December 2015 10:30:10 Johannes Weiner wrote:
> Hi Andrew,
> 
> there was some build breakage in CONFIG_ combinations I hadn't tested
> in the last revision, so here is a fixed-up resend with minimal CC
> list. The only difference to the previous version is a section in
> memcontrol.h, but it accumulates throughout the series and would have
> been a pain to resolve on your end. So here goes. This also includes
> the review tags that Dave and Vlad had sent out in the meantime.
> 
> Difference to the original v4:

I needed two more patches on top of today's linux-next kernel, will
send them as replies to this mail. I don't know if you have already
fixed the issues for !CONFIG_INET and CONFIG_SLOB, if not please
fold them into your series.

	Arnd

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
  2015-12-09 16:31   ` Arnd Bergmann
  (?)
@ 2015-12-09 16:32     ` Arnd Bergmann
  -1 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:32 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

When IPV4 support is disabled, the memcg->socket_pressure field is
not defined and we get a build error from the vmpressure code:

mm/vmpressure.c: In function 'vmpressure':
mm/vmpressure.c:287:9: error: 'struct mem_cgroup' has no member named 'socket_pressure'
    memcg->socket_pressure = jiffies + HZ;
mm/built-in.o: In function `mem_cgroup_css_free':
:(.text+0x1c03a): undefined reference to `tcp_destroy_cgroup'
mm/built-in.o: In function `mem_cgroup_css_online':
:(.text+0x1c20e): undefined reference to `tcp_init_cgroup'

This puts the code causing this in the same #ifdef that guards the
struct member and the TCP implementation.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 20cc40e66c42 ("mm: memcontrol: hook up vmpressure to socket pressure")

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6faea81e66d7..73cd572167bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_INET
 #ifdef CONFIG_MEMCG_LEGACY_KMEM
 	ret = tcp_init_cgroup(memcg);
 	if (ret)
 		return ret;
 #endif
 
-#ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
 		static_branch_inc(&memcg_sockets_enabled_key);
 #endif
@@ -4276,7 +4276,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 
 	memcg_free_kmem(memcg);
 
-#ifdef CONFIG_MEMCG_LEGACY_KMEM
+#if defined(CONFIG_MEMCG_LEGACY_KMEM) && defined(CONFIG_INET)
 	tcp_destroy_cgroup(memcg);
 #endif
 
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 506f03e4be47..8cdeebe48848 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -275,6 +275,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
 		level = vmpressure_calc_level(scanned, reclaimed);
 
+#ifdef CONFIG_INET
 		if (level > VMPRESSURE_LOW) {
 			/*
 			 * Let the socket buffer allocator know that
@@ -286,6 +287,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			 */
 			memcg->socket_pressure = jiffies + HZ;
 		}
+#endif
 	}
 }
 


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 16:32     ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:32 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

When IPV4 support is disabled, the memcg->socket_pressure field is
not defined and we get a build error from the vmpressure code:

mm/vmpressure.c: In function 'vmpressure':
mm/vmpressure.c:287:9: error: 'struct mem_cgroup' has no member named 'socket_pressure'
    memcg->socket_pressure = jiffies + HZ;
mm/built-in.o: In function `mem_cgroup_css_free':
:(.text+0x1c03a): undefined reference to `tcp_destroy_cgroup'
mm/built-in.o: In function `mem_cgroup_css_online':
:(.text+0x1c20e): undefined reference to `tcp_init_cgroup'

This puts the code causing this in the same #ifdef that guards the
struct member and the TCP implementation.

Signed-off-by: Arnd Bergmann <arnd-r2nGTMty4D4@public.gmane.org>
Fixes: 20cc40e66c42 ("mm: memcontrol: hook up vmpressure to socket pressure")

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6faea81e66d7..73cd572167bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_INET
 #ifdef CONFIG_MEMCG_LEGACY_KMEM
 	ret = tcp_init_cgroup(memcg);
 	if (ret)
 		return ret;
 #endif
 
-#ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
 		static_branch_inc(&memcg_sockets_enabled_key);
 #endif
@@ -4276,7 +4276,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 
 	memcg_free_kmem(memcg);
 
-#ifdef CONFIG_MEMCG_LEGACY_KMEM
+#if defined(CONFIG_MEMCG_LEGACY_KMEM) && defined(CONFIG_INET)
 	tcp_destroy_cgroup(memcg);
 #endif
 
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 506f03e4be47..8cdeebe48848 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -275,6 +275,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
 		level = vmpressure_calc_level(scanned, reclaimed);
 
+#ifdef CONFIG_INET
 		if (level > VMPRESSURE_LOW) {
 			/*
 			 * Let the socket buffer allocator know that
@@ -286,6 +287,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			 */
 			memcg->socket_pressure = jiffies + HZ;
 		}
+#endif
 	}
 }
 

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 16:32     ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:32 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

When IPV4 support is disabled, the memcg->socket_pressure field is
not defined and we get a build error from the vmpressure code:

mm/vmpressure.c: In function 'vmpressure':
mm/vmpressure.c:287:9: error: 'struct mem_cgroup' has no member named 'socket_pressure'
    memcg->socket_pressure = jiffies + HZ;
mm/built-in.o: In function `mem_cgroup_css_free':
:(.text+0x1c03a): undefined reference to `tcp_destroy_cgroup'
mm/built-in.o: In function `mem_cgroup_css_online':
:(.text+0x1c20e): undefined reference to `tcp_init_cgroup'

This puts the code causing this in the same #ifdef that guards the
struct member and the TCP implementation.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 20cc40e66c42 ("mm: memcontrol: hook up vmpressure to socket pressure")

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6faea81e66d7..73cd572167bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_INET
 #ifdef CONFIG_MEMCG_LEGACY_KMEM
 	ret = tcp_init_cgroup(memcg);
 	if (ret)
 		return ret;
 #endif
 
-#ifdef CONFIG_INET
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
 		static_branch_inc(&memcg_sockets_enabled_key);
 #endif
@@ -4276,7 +4276,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 
 	memcg_free_kmem(memcg);
 
-#ifdef CONFIG_MEMCG_LEGACY_KMEM
+#if defined(CONFIG_MEMCG_LEGACY_KMEM) && defined(CONFIG_INET)
 	tcp_destroy_cgroup(memcg);
 #endif
 
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 506f03e4be47..8cdeebe48848 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -275,6 +275,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 
 		level = vmpressure_calc_level(scanned, reclaimed);
 
+#ifdef CONFIG_INET
 		if (level > VMPRESSURE_LOW) {
 			/*
 			 * Let the socket buffer allocator know that
@@ -286,6 +287,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			 */
 			memcg->socket_pressure = jiffies + HZ;
 		}
+#endif
 	}
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
  2015-12-09 16:31   ` Arnd Bergmann
@ 2015-12-09 16:32     ` Arnd Bergmann
  -1 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:32 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

The change to move the kmem accounting into the normal memcg
code means we can no longer use memcg with slob, which lacks
the memcg_params member in its struct kmem_cache:

../mm/slab.h: In function 'is_root_cache':
../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

This enforces the new dependency in Kconfig. Alternatively,
we could change the slob code to allow using MEMCG.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 6e6133536d82 ("mm: memcontrol: move kmem accounting code to CONFIG_MEMCG")

diff --git a/init/Kconfig b/init/Kconfig
index 4822bb359fea..f4d81d382608 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -999,6 +999,7 @@ config PAGE_COUNTER
 
 config MEMCG
 	bool "Memory Resource Controller for Control Groups"
+	depends on SLAB || SLUB
 	select PAGE_COUNTER
 	select EVENTFD
 	help
@@ -1040,7 +1041,6 @@ config MEMCG_LEGACY_KMEM
 config MEMCG_KMEM
 	bool "Legacy Memory Resource Controller Kernel Memory accounting"
 	depends on MEMCG
-	depends on SLUB || SLAB
 	select MEMCG_LEGACY_KMEM
 	help
 	  The Kernel Memory extension for Memory Resource Controller can limit


^ permalink raw reply related	[flat|nested] 66+ messages in thread

* [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-09 16:32     ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 16:32 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

The change to move the kmem accounting into the normal memcg
code means we can no longer use memcg with slob, which lacks
the memcg_params member in its struct kmem_cache:

../mm/slab.h: In function 'is_root_cache':
../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

This enforces the new dependency in Kconfig. Alternatively,
we could change the slob code to allow using MEMCG.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 6e6133536d82 ("mm: memcontrol: move kmem accounting code to CONFIG_MEMCG")

diff --git a/init/Kconfig b/init/Kconfig
index 4822bb359fea..f4d81d382608 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -999,6 +999,7 @@ config PAGE_COUNTER
 
 config MEMCG
 	bool "Memory Resource Controller for Control Groups"
+	depends on SLAB || SLUB
 	select PAGE_COUNTER
 	select EVENTFD
 	help
@@ -1040,7 +1041,6 @@ config MEMCG_LEGACY_KMEM
 config MEMCG_KMEM
 	bool "Legacy Memory Resource Controller Kernel Memory accounting"
 	depends on MEMCG
-	depends on SLUB || SLAB
 	select MEMCG_LEGACY_KMEM
 	help
 	  The Kernel Memory extension for Memory Resource Controller can limit

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
  2015-12-09 16:31   ` Arnd Bergmann
@ 2015-12-09 18:17     ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 18:17 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

Hey Arnd!

On Wed, Dec 09, 2015 at 05:31:38PM +0100, Arnd Bergmann wrote:
> On Tuesday 08 December 2015 10:30:10 Johannes Weiner wrote:
> > Hi Andrew,
> > 
> > there was some build breakage in CONFIG_ combinations I hadn't tested
> > in the last revision, so here is a fixed-up resend with minimal CC
> > list. The only difference to the previous version is a section in
> > memcontrol.h, but it accumulates throughout the series and would have
> > been a pain to resolve on your end. So here goes. This also includes
> > the review tags that Dave and Vlad had sent out in the meantime.
> > 
> > Difference to the original v4:
> 
> I needed two more patches on top of today's linux-next kernel, will
> send them as replies to this mail. I don't know if you have already
> fixed the issues for !CONFIG_INET and CONFIG_SLOB, if not please
> fold them into your series.

Sorry for breaking your stuff, and thanks for sending patches. I'll
get to them in a minute and will make sure the fixes get routed to
Andrew.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND
@ 2015-12-09 18:17     ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 18:17 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

Hey Arnd!

On Wed, Dec 09, 2015 at 05:31:38PM +0100, Arnd Bergmann wrote:
> On Tuesday 08 December 2015 10:30:10 Johannes Weiner wrote:
> > Hi Andrew,
> > 
> > there was some build breakage in CONFIG_ combinations I hadn't tested
> > in the last revision, so here is a fixed-up resend with minimal CC
> > list. The only difference to the previous version is a section in
> > memcontrol.h, but it accumulates throughout the series and would have
> > been a pain to resolve on your end. So here goes. This also includes
> > the review tags that Dave and Vlad had sent out in the meantime.
> > 
> > Difference to the original v4:
> 
> I needed two more patches on top of today's linux-next kernel, will
> send them as replies to this mail. I don't know if you have already
> fixed the issues for !CONFIG_INET and CONFIG_SLOB, if not please
> fold them into your series.

Sorry for breaking your stuff, and thanks for sending patches. I'll
get to them in a minute and will make sure the fixes get routed to
Andrew.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
  2015-12-09 16:32     ` Arnd Bergmann
@ 2015-12-09 18:58       ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 18:58 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 05:32:16PM +0100, Arnd Bergmann wrote:
> When IPV4 support is disabled, the memcg->socket_pressure field is
> not defined and we get a build error from the vmpressure code:
> 
> mm/vmpressure.c: In function 'vmpressure':
> mm/vmpressure.c:287:9: error: 'struct mem_cgroup' has no member named 'socket_pressure'
>     memcg->socket_pressure = jiffies + HZ;
> mm/built-in.o: In function `mem_cgroup_css_free':
> :(.text+0x1c03a): undefined reference to `tcp_destroy_cgroup'
> mm/built-in.o: In function `mem_cgroup_css_online':
> :(.text+0x1c20e): undefined reference to `tcp_init_cgroup'
> 
> This puts the code causing this in the same #ifdef that guards the
> struct member and the TCP implementation.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> Fixes: 20cc40e66c42 ("mm: memcontrol: hook up vmpressure to socket pressure")

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6faea81e66d7..73cd572167bb 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (ret)
>  		return ret;
>  
> +#ifdef CONFIG_INET
>  #ifdef CONFIG_MEMCG_LEGACY_KMEM
>  	ret = tcp_init_cgroup(memcg);
>  	if (ret)
>  		return ret;
>  #endif

The calls to tcp_init_cgroup() appear earlier in the series than "mm:
memcontrol: hook up vmpressure to socket pressure". However, they get
moved around a few times so fixing it earlier means respinning the
series. Andrew, it's up to you whether we take the bisectability hit
for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
want me to resend the series.

Sorry about the trouble. I don't have a git tree on kernel.org because
we don't really use git in -mm, but the downside is that we don't get
the benefits of the automatic build testing for all kinds of configs.
I'll try to set up a git tree to expose series to full build coverage
before they hit -mm and -next.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 18:58       ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 18:58 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 05:32:16PM +0100, Arnd Bergmann wrote:
> When IPV4 support is disabled, the memcg->socket_pressure field is
> not defined and we get a build error from the vmpressure code:
> 
> mm/vmpressure.c: In function 'vmpressure':
> mm/vmpressure.c:287:9: error: 'struct mem_cgroup' has no member named 'socket_pressure'
>     memcg->socket_pressure = jiffies + HZ;
> mm/built-in.o: In function `mem_cgroup_css_free':
> :(.text+0x1c03a): undefined reference to `tcp_destroy_cgroup'
> mm/built-in.o: In function `mem_cgroup_css_online':
> :(.text+0x1c20e): undefined reference to `tcp_init_cgroup'
> 
> This puts the code causing this in the same #ifdef that guards the
> struct member and the TCP implementation.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> Fixes: 20cc40e66c42 ("mm: memcontrol: hook up vmpressure to socket pressure")

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6faea81e66d7..73cd572167bb 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (ret)
>  		return ret;
>  
> +#ifdef CONFIG_INET
>  #ifdef CONFIG_MEMCG_LEGACY_KMEM
>  	ret = tcp_init_cgroup(memcg);
>  	if (ret)
>  		return ret;
>  #endif

The calls to tcp_init_cgroup() appear earlier in the series than "mm:
memcontrol: hook up vmpressure to socket pressure". However, they get
moved around a few times so fixing it earlier means respinning the
series. Andrew, it's up to you whether we take the bisectability hit
for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
want me to resend the series.

Sorry about the trouble. I don't have a git tree on kernel.org because
we don't really use git in -mm, but the downside is that we don't get
the benefits of the automatic build testing for all kinds of configs.
I'll try to set up a git tree to expose series to full build coverage
before they hit -mm and -next.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
  2015-12-09 16:32     ` Arnd Bergmann
  (?)
@ 2015-12-09 20:01       ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 20:01 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel, Vladimir Davydov

On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> The change to move the kmem accounting into the normal memcg
> code means we can no longer use memcg with slob, which lacks
> the memcg_params member in its struct kmem_cache:
> 
> ../mm/slab.h: In function 'is_root_cache':
> ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'
> 
> This enforces the new dependency in Kconfig. Alternatively,
> we could change the slob code to allow using MEMCG.

I'm curious, was this a random config or do you actually use
CONFIG_SLOB && CONFIG_MEMCG?

Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
prefer not littering the source with

#if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))

or

#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)

for such a special case. The #ifdefs are already out of hand in there.

Vladimir, what would you think of simply doing this?

diff --git a/mm/slab.h b/mm/slab.h
index 5adec08..0b3ec4b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -25,6 +25,9 @@ struct kmem_cache {
 	int refcount;		/* Use counter */
 	void (*ctor)(void *);	/* Called on object slot creation */
 	struct list_head list;	/* List of all slab caches on the system */
+#ifdef CONFIG_MEMCG
+	struct memcg_cache_params memcg_params;
+#endif
 };
 
 #endif /* CONFIG_SLOB */

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-09 20:01       ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 20:01 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Andrew Morton, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Vladimir Davydov

On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> The change to move the kmem accounting into the normal memcg
> code means we can no longer use memcg with slob, which lacks
> the memcg_params member in its struct kmem_cache:
> 
> ../mm/slab.h: In function 'is_root_cache':
> ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'
> 
> This enforces the new dependency in Kconfig. Alternatively,
> we could change the slob code to allow using MEMCG.

I'm curious, was this a random config or do you actually use
CONFIG_SLOB && CONFIG_MEMCG?

Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
prefer not littering the source with

#if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))

or

#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)

for such a special case. The #ifdefs are already out of hand in there.

Vladimir, what would you think of simply doing this?

diff --git a/mm/slab.h b/mm/slab.h
index 5adec08..0b3ec4b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -25,6 +25,9 @@ struct kmem_cache {
 	int refcount;		/* Use counter */
 	void (*ctor)(void *);	/* Called on object slot creation */
 	struct list_head list;	/* List of all slab caches on the system */
+#ifdef CONFIG_MEMCG
+	struct memcg_cache_params memcg_params;
+#endif
 };
 
 #endif /* CONFIG_SLOB */

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-09 20:01       ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 20:01 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel, Vladimir Davydov

On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> The change to move the kmem accounting into the normal memcg
> code means we can no longer use memcg with slob, which lacks
> the memcg_params member in its struct kmem_cache:
> 
> ../mm/slab.h: In function 'is_root_cache':
> ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'
> 
> This enforces the new dependency in Kconfig. Alternatively,
> we could change the slob code to allow using MEMCG.

I'm curious, was this a random config or do you actually use
CONFIG_SLOB && CONFIG_MEMCG?

Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
prefer not littering the source with

#if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))

or

#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)

for such a special case. The #ifdefs are already out of hand in there.

Vladimir, what would you think of simply doing this?

diff --git a/mm/slab.h b/mm/slab.h
index 5adec08..0b3ec4b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -25,6 +25,9 @@ struct kmem_cache {
 	int refcount;		/* Use counter */
 	void (*ctor)(void *);	/* Called on object slot creation */
 	struct list_head list;	/* List of all slab caches on the system */
+#ifdef CONFIG_MEMCG
+	struct memcg_cache_params memcg_params;
+#endif
 };
 
 #endif /* CONFIG_SLOB */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
  2015-12-09 20:01       ` Johannes Weiner
@ 2015-12-09 21:03         ` Arnd Bergmann
  -1 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 21:03 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel, Vladimir Davydov

On Wednesday 09 December 2015 15:01:07 Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'
> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?

Just a randconfig build, I do a lot of those to check for ARM specific
regressions.
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>         int refcount;           /* Use counter */
>         void (*ctor)(void *);   /* Called on object slot creation */
>         struct list_head list;  /* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +       struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

This was my first approach to the problem, and it solves the build issues,
I just wasn't sure if it works as expected.

	Arnd

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-09 21:03         ` Arnd Bergmann
  0 siblings, 0 replies; 66+ messages in thread
From: Arnd Bergmann @ 2015-12-09 21:03 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel, Vladimir Davydov

On Wednesday 09 December 2015 15:01:07 Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'
> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?

Just a randconfig build, I do a lot of those to check for ARM specific
regressions.
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>         int refcount;           /* Use counter */
>         void (*ctor)(void *);   /* Called on object slot creation */
>         struct list_head list;  /* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +       struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

This was my first approach to the problem, and it solves the build issues,
I just wasn't sure if it works as expected.

	Arnd

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 22:28         ` Andrew Morton
  0 siblings, 0 replies; 66+ messages in thread
From: Andrew Morton @ 2015-12-09 22:28 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:

> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 6faea81e66d7..73cd572167bb 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
> >  	if (ret)
> >  		return ret;
> >  
> > +#ifdef CONFIG_INET
> >  #ifdef CONFIG_MEMCG_LEGACY_KMEM
> >  	ret = tcp_init_cgroup(memcg);
> >  	if (ret)
> >  		return ret;
> >  #endif
> 
> The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> memcontrol: hook up vmpressure to socket pressure". However, they get
> moved around a few times so fixing it earlier means respinning the
> series. Andrew, it's up to you whether we take the bisectability hit
> for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> want me to resend the series.

hm, drat, I was suspecting dependency issues here, but a test build
said it was OK.

Actually, I was expecting this patch series to depend on the linux-next
cgroup2 changes, but that doesn't appear to be the case.  *should* this
series be staged after the cgroup2 code?

Regarding this particular series: yes, I think we can live with a
bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
sure why we're discussing bisection issues, because Arnd's build
failure occurs with everything applied?

> Sorry about the trouble. I don't have a git tree on kernel.org because
> we don't really use git in -mm, but the downside is that we don't get
> the benefits of the automatic build testing for all kinds of configs.
> I'll try to set up a git tree to expose series to full build coverage
> before they hit -mm and -next.

This sort of thing happens quite rarely.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 22:28         ` Andrew Morton
  0 siblings, 0 replies; 66+ messages in thread
From: Andrew Morton @ 2015-12-09 22:28 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Arnd Bergmann, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org> wrote:

> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 6faea81e66d7..73cd572167bb 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
> >  	if (ret)
> >  		return ret;
> >  
> > +#ifdef CONFIG_INET
> >  #ifdef CONFIG_MEMCG_LEGACY_KMEM
> >  	ret = tcp_init_cgroup(memcg);
> >  	if (ret)
> >  		return ret;
> >  #endif
> 
> The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> memcontrol: hook up vmpressure to socket pressure". However, they get
> moved around a few times so fixing it earlier means respinning the
> series. Andrew, it's up to you whether we take the bisectability hit
> for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> want me to resend the series.

hm, drat, I was suspecting dependency issues here, but a test build
said it was OK.

Actually, I was expecting this patch series to depend on the linux-next
cgroup2 changes, but that doesn't appear to be the case.  *should* this
series be staged after the cgroup2 code?

Regarding this particular series: yes, I think we can live with a
bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
sure why we're discussing bisection issues, because Arnd's build
failure occurs with everything applied?

> Sorry about the trouble. I don't have a git tree on kernel.org because
> we don't really use git in -mm, but the downside is that we don't get
> the benefits of the automatic build testing for all kinds of configs.
> I'll try to set up a git tree to expose series to full build coverage
> before they hit -mm and -next.

This sort of thing happens quite rarely.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 22:28         ` Andrew Morton
  0 siblings, 0 replies; 66+ messages in thread
From: Andrew Morton @ 2015-12-09 22:28 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:

> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 6faea81e66d7..73cd572167bb 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
> >  	if (ret)
> >  		return ret;
> >  
> > +#ifdef CONFIG_INET
> >  #ifdef CONFIG_MEMCG_LEGACY_KMEM
> >  	ret = tcp_init_cgroup(memcg);
> >  	if (ret)
> >  		return ret;
> >  #endif
> 
> The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> memcontrol: hook up vmpressure to socket pressure". However, they get
> moved around a few times so fixing it earlier means respinning the
> series. Andrew, it's up to you whether we take the bisectability hit
> for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> want me to resend the series.

hm, drat, I was suspecting dependency issues here, but a test build
said it was OK.

Actually, I was expecting this patch series to depend on the linux-next
cgroup2 changes, but that doesn't appear to be the case.  *should* this
series be staged after the cgroup2 code?

Regarding this particular series: yes, I think we can live with a
bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
sure why we're discussing bisection issues, because Arnd's build
failure occurs with everything applied?

> Sorry about the trouble. I don't have a git tree on kernel.org because
> we don't really use git in -mm, but the downside is that we don't get
> the benefits of the automatic build testing for all kinds of configs.
> I'll try to set up a git tree to expose series to full build coverage
> before they hit -mm and -next.

This sort of thing happens quite rarely.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
  2015-12-09 22:28         ` Andrew Morton
@ 2015-12-09 23:05           ` Johannes Weiner
  -1 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 23:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
> On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
> 
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 6faea81e66d7..73cd572167bb 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > +#ifdef CONFIG_INET
> > >  #ifdef CONFIG_MEMCG_LEGACY_KMEM
> > >  	ret = tcp_init_cgroup(memcg);
> > >  	if (ret)
> > >  		return ret;
> > >  #endif
> > 
> > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> > memcontrol: hook up vmpressure to socket pressure". However, they get
> > moved around a few times so fixing it earlier means respinning the
> > series. Andrew, it's up to you whether we take the bisectability hit
> > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> > want me to resend the series.
> 
> hm, drat, I was suspecting dependency issues here, but a test build
> said it was OK.
> 
> Actually, I was expecting this patch series to depend on the linux-next
> cgroup2 changes, but that doesn't appear to be the case.  *should* this
> series be staged after the cgroup2 code?

Code-wise they are independent. My stuff is finishing up the new memcg
control knobs, the cgroup2 stuff is changing how and when those knobs
are exposed from within the cgroup core. I'm not relying on any recent
changes in the cgroup core AFAICS, so the order shouldn't matter here.

> Regarding this particular series: yes, I think we can live with a
> bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
> sure why we're discussing bisection issues, because Arnd's build
> failure occurs with everything applied?

Arnd's patches apply to the top of the stack, but they address issues
introduced early in the series and the problematic code gets touched a
lot in subsequent patches. E.g. the first build breakage is in ("net:
tcp_memcontrol: simplify linkage between socket and page counter")
when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
moved around and lose the CONFIG_INET protection.

This will leave states in between broken for this configuration, which
is why I brought up bisection. Or did you mean something else?

> > Sorry about the trouble. I don't have a git tree on kernel.org because
> > we don't really use git in -mm, but the downside is that we don't get
> > the benefits of the automatic build testing for all kinds of configs.
> > I'll try to set up a git tree to expose series to full build coverage
> > before they hit -mm and -next.
> 
> This sort of thing happens quite rarely.

True, this is a particularly tedious one. The only reason I brought it
up is because I use git to prepare patches anyway, and pushing patches
into a branch reachable by Fengguang's rig before I send emails might
have caught this stuff without spamming so many inboxes ;)

Anyway, if we can live with the bisection caveat then Arnd's fixes on
top of the kmem series look good to me. Depending on what Vladimir
thinks we might want to replace the CONFIG_SLOB fix with something
else later on, but that shouldn't be a problem, either.

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 23:05           ` Johannes Weiner
  0 siblings, 0 replies; 66+ messages in thread
From: Johannes Weiner @ 2015-12-09 23:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
> On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
> 
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 6faea81e66d7..73cd572167bb 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -4220,13 +4220,13 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > +#ifdef CONFIG_INET
> > >  #ifdef CONFIG_MEMCG_LEGACY_KMEM
> > >  	ret = tcp_init_cgroup(memcg);
> > >  	if (ret)
> > >  		return ret;
> > >  #endif
> > 
> > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> > memcontrol: hook up vmpressure to socket pressure". However, they get
> > moved around a few times so fixing it earlier means respinning the
> > series. Andrew, it's up to you whether we take the bisectability hit
> > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> > want me to resend the series.
> 
> hm, drat, I was suspecting dependency issues here, but a test build
> said it was OK.
> 
> Actually, I was expecting this patch series to depend on the linux-next
> cgroup2 changes, but that doesn't appear to be the case.  *should* this
> series be staged after the cgroup2 code?

Code-wise they are independent. My stuff is finishing up the new memcg
control knobs, the cgroup2 stuff is changing how and when those knobs
are exposed from within the cgroup core. I'm not relying on any recent
changes in the cgroup core AFAICS, so the order shouldn't matter here.

> Regarding this particular series: yes, I think we can live with a
> bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
> sure why we're discussing bisection issues, because Arnd's build
> failure occurs with everything applied?

Arnd's patches apply to the top of the stack, but they address issues
introduced early in the series and the problematic code gets touched a
lot in subsequent patches. E.g. the first build breakage is in ("net:
tcp_memcontrol: simplify linkage between socket and page counter")
when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
moved around and lose the CONFIG_INET protection.

This will leave states in between broken for this configuration, which
is why I brought up bisection. Or did you mean something else?

> > Sorry about the trouble. I don't have a git tree on kernel.org because
> > we don't really use git in -mm, but the downside is that we don't get
> > the benefits of the automatic build testing for all kinds of configs.
> > I'll try to set up a git tree to expose series to full build coverage
> > before they hit -mm and -next.
> 
> This sort of thing happens quite rarely.

True, this is a particularly tedious one. The only reason I brought it
up is because I use git to prepare patches anyway, and pushing patches
into a branch reachable by Fengguang's rig before I send emails might
have caught this stuff without spamming so many inboxes ;)

Anyway, if we can live with the bisection caveat then Arnd's fixes on
top of the kmem series look good to me. Depending on what Vladimir
thinks we might want to replace the CONFIG_SLOB fix with something
else later on, but that shouldn't be a problem, either.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
  2015-12-09 23:05           ` Johannes Weiner
@ 2015-12-09 23:13             ` Andrew Morton
  -1 siblings, 0 replies; 66+ messages in thread
From: Andrew Morton @ 2015-12-09 23:13 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, 9 Dec 2015 18:05:05 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:

> On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
> > On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
> > > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> > > memcontrol: hook up vmpressure to socket pressure". However, they get
> > > moved around a few times so fixing it earlier means respinning the
> > > series. Andrew, it's up to you whether we take the bisectability hit
> > > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> > > want me to resend the series.
> > 
> > hm, drat, I was suspecting dependency issues here, but a test build
> > said it was OK.
> > 
> > Actually, I was expecting this patch series to depend on the linux-next
> > cgroup2 changes, but that doesn't appear to be the case.  *should* this
> > series be staged after the cgroup2 code?
> 
> Code-wise they are independent. My stuff is finishing up the new memcg
> control knobs, the cgroup2 stuff is changing how and when those knobs
> are exposed from within the cgroup core. I'm not relying on any recent
> changes in the cgroup core AFAICS, so the order shouldn't matter here.

OK, thanks.

> > Regarding this particular series: yes, I think we can live with a
> > bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
> > sure why we're discussing bisection issues, because Arnd's build
> > failure occurs with everything applied?
> 
> Arnd's patches apply to the top of the stack, but they address issues
> introduced early in the series and the problematic code gets touched a
> lot in subsequent patches. E.g. the first build breakage is in ("net:
> tcp_memcontrol: simplify linkage between socket and page counter")
> when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
> moved around and lose the CONFIG_INET protection.

Yeah, this is a pain.  I think I'll fold Arnd's fix into
mm-memcontrol-introduce-config_memcg_legacy_kmem.patch (which is staged
after all the other MM patches and after linux-next) and will pretend I
didn't know about the issue ;)

> Anyway, if we can live with the bisection caveat then Arnd's fixes on
> top of the kmem series look good to me. Depending on what Vladimir
> thinks we might want to replace the CONFIG_SLOB fix with something
> else later on, but that shouldn't be a problem, either.

I don't have a fix for the CONFIG_SLOB&&CONFIG_MEMCG issue yet.  I
agree that it would be best to make the combination work correctly
rather than banning it, but that does require a bit of runtime testing.


^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2015-12-09 23:13             ` Andrew Morton
  0 siblings, 0 replies; 66+ messages in thread
From: Andrew Morton @ 2015-12-09 23:13 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

On Wed, 9 Dec 2015 18:05:05 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:

> On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
> > On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
> > > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
> > > memcontrol: hook up vmpressure to socket pressure". However, they get
> > > moved around a few times so fixing it earlier means respinning the
> > > series. Andrew, it's up to you whether we take the bisectability hit
> > > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
> > > want me to resend the series.
> > 
> > hm, drat, I was suspecting dependency issues here, but a test build
> > said it was OK.
> > 
> > Actually, I was expecting this patch series to depend on the linux-next
> > cgroup2 changes, but that doesn't appear to be the case.  *should* this
> > series be staged after the cgroup2 code?
> 
> Code-wise they are independent. My stuff is finishing up the new memcg
> control knobs, the cgroup2 stuff is changing how and when those knobs
> are exposed from within the cgroup core. I'm not relying on any recent
> changes in the cgroup core AFAICS, so the order shouldn't matter here.

OK, thanks.

> > Regarding this particular series: yes, I think we can live with a
> > bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
> > sure why we're discussing bisection issues, because Arnd's build
> > failure occurs with everything applied?
> 
> Arnd's patches apply to the top of the stack, but they address issues
> introduced early in the series and the problematic code gets touched a
> lot in subsequent patches. E.g. the first build breakage is in ("net:
> tcp_memcontrol: simplify linkage between socket and page counter")
> when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
> moved around and lose the CONFIG_INET protection.

Yeah, this is a pain.  I think I'll fold Arnd's fix into
mm-memcontrol-introduce-config_memcg_legacy_kmem.patch (which is staged
after all the other MM patches and after linux-next) and will pretend I
didn't know about the issue ;)

> Anyway, if we can live with the bisection caveat then Arnd's fixes on
> top of the kmem series look good to me. Depending on what Vladimir
> thinks we might want to replace the CONFIG_SLOB fix with something
> else later on, but that shouldn't be a problem, either.

I don't have a fix for the CONFIG_SLOB&&CONFIG_MEMCG issue yet.  I
agree that it would be best to make the combination work correctly
rather than banning it, but that does require a bit of runtime testing.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-10 11:24         ` Vladimir Davydov
  0 siblings, 0 replies; 66+ messages in thread
From: Vladimir Davydov @ 2015-12-10 11:24 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Arnd Bergmann, Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 03:01:07PM -0500, Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

Argh, I completely forgot about this SLOB thing :-(

> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?
> 
> Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
> prefer not littering the source with
> 
> #if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
> 
> or
> 
> #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
> 
> for such a special case. The #ifdefs are already out of hand in there.
> 
> Vladimir, what would you think of simply doing this?
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>  	int refcount;		/* Use counter */
>  	void (*ctor)(void *);	/* Called on object slot creation */
>  	struct list_head list;	/* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +	struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

I don't like it. This would result in allocation of per memcg arrays for
each list_lru/kmem_cache, which would never be used. This looks
extremely ugly. I'd prefer to make CONFIG_MEMCG depend on SL[AU]B, but
I'm afraid such a change will be frowned upon - who knows who uses
MEMCG & SLOB?

I guess SLOB could be made memcg-aware, but I don't think it's worth the
trouble, although I can take a look in this direction - from a quick
glance at SLOB it shouldn't be difficult. If we decide to go this way, I
think we could use this patch as a temporary fix, which would be
reverted eventually.

Otherwise, no matter how tempting the idea to put all memcg stuff under
CONFIG_MEMCG is, I think it won't fly, so for now we should use ifdefs.
To avoid complex checks, we could define a macro in memcontrol.h, say
MEMCG_KMEM_ENABLED, and use it throughout the code. And I think we
should wrap list_lru stuff in it either :-/

Thanks,
Vladimir

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-10 11:24         ` Vladimir Davydov
  0 siblings, 0 replies; 66+ messages in thread
From: Vladimir Davydov @ 2015-12-10 11:24 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Arnd Bergmann, Andrew Morton, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Wed, Dec 09, 2015 at 03:01:07PM -0500, Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

Argh, I completely forgot about this SLOB thing :-(

> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?
> 
> Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
> prefer not littering the source with
> 
> #if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
> 
> or
> 
> #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
> 
> for such a special case. The #ifdefs are already out of hand in there.
> 
> Vladimir, what would you think of simply doing this?
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>  	int refcount;		/* Use counter */
>  	void (*ctor)(void *);	/* Called on object slot creation */
>  	struct list_head list;	/* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +	struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

I don't like it. This would result in allocation of per memcg arrays for
each list_lru/kmem_cache, which would never be used. This looks
extremely ugly. I'd prefer to make CONFIG_MEMCG depend on SL[AU]B, but
I'm afraid such a change will be frowned upon - who knows who uses
MEMCG & SLOB?

I guess SLOB could be made memcg-aware, but I don't think it's worth the
trouble, although I can take a look in this direction - from a quick
glance at SLOB it shouldn't be difficult. If we decide to go this way, I
think we could use this patch as a temporary fix, which would be
reverted eventually.

Otherwise, no matter how tempting the idea to put all memcg stuff under
CONFIG_MEMCG is, I think it won't fly, so for now we should use ifdefs.
To avoid complex checks, we could define a macro in memcontrol.h, say
MEMCG_KMEM_ENABLED, and use it throughout the code. And I think we
should wrap list_lru stuff in it either :-/

Thanks,
Vladimir

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-10 11:24         ` Vladimir Davydov
  0 siblings, 0 replies; 66+ messages in thread
From: Vladimir Davydov @ 2015-12-10 11:24 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Arnd Bergmann, Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Wed, Dec 09, 2015 at 03:01:07PM -0500, Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

Argh, I completely forgot about this SLOB thing :-(

> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?
> 
> Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
> prefer not littering the source with
> 
> #if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
> 
> or
> 
> #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
> 
> for such a special case. The #ifdefs are already out of hand in there.
> 
> Vladimir, what would you think of simply doing this?
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>  	int refcount;		/* Use counter */
>  	void (*ctor)(void *);	/* Called on object slot creation */
>  	struct list_head list;	/* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +	struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

I don't like it. This would result in allocation of per memcg arrays for
each list_lru/kmem_cache, which would never be used. This looks
extremely ugly. I'd prefer to make CONFIG_MEMCG depend on SL[AU]B, but
I'm afraid such a change will be frowned upon - who knows who uses
MEMCG & SLOB?

I guess SLOB could be made memcg-aware, but I don't think it's worth the
trouble, although I can take a look in this direction - from a quick
glance at SLOB it shouldn't be difficult. If we decide to go this way, I
think we could use this patch as a temporary fix, which would be
reverted eventually.

Otherwise, no matter how tempting the idea to put all memcg stuff under
CONFIG_MEMCG is, I think it won't fly, so for now we should use ifdefs.
To avoid complex checks, we could define a macro in memcontrol.h, say
MEMCG_KMEM_ENABLED, and use it throughout the code. And I think we
should wrap list_lru stuff in it either :-/

Thanks,
Vladimir

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: MEMCG no longer works with SLOB
@ 2015-12-10 11:24         ` Vladimir Davydov
  0 siblings, 0 replies; 66+ messages in thread
From: Vladimir Davydov @ 2015-12-10 11:24 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Arnd Bergmann, Andrew Morton, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Wed, Dec 09, 2015 at 03:01:07PM -0500, Johannes Weiner wrote:
> On Wed, Dec 09, 2015 at 05:32:39PM +0100, Arnd Bergmann wrote:
> > The change to move the kmem accounting into the normal memcg
> > code means we can no longer use memcg with slob, which lacks
> > the memcg_params member in its struct kmem_cache:
> > 
> > ../mm/slab.h: In function 'is_root_cache':
> > ../mm/slab.h:187:10: error: 'struct kmem_cache' has no member named 'memcg_params'

Argh, I completely forgot about this SLOB thing :-(

> > 
> > This enforces the new dependency in Kconfig. Alternatively,
> > we could change the slob code to allow using MEMCG.
> 
> I'm curious, was this a random config or do you actually use
> CONFIG_SLOB && CONFIG_MEMCG?
> 
> Excluding CONFIG_MEMCG completely for slob seems harsh, but I would
> prefer not littering the source with
> 
> #if defined(CONFIG_MEMCG) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
> 
> or
> 
> #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
> 
> for such a special case. The #ifdefs are already out of hand in there.
> 
> Vladimir, what would you think of simply doing this?
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 5adec08..0b3ec4b 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -25,6 +25,9 @@ struct kmem_cache {
>  	int refcount;		/* Use counter */
>  	void (*ctor)(void *);	/* Called on object slot creation */
>  	struct list_head list;	/* List of all slab caches on the system */
> +#ifdef CONFIG_MEMCG
> +	struct memcg_cache_params memcg_params;
> +#endif
>  };
>  
>  #endif /* CONFIG_SLOB */

I don't like it. This would result in allocation of per memcg arrays for
each list_lru/kmem_cache, which would never be used. This looks
extremely ugly. I'd prefer to make CONFIG_MEMCG depend on SL[AU]B, but
I'm afraid such a change will be frowned upon - who knows who uses
MEMCG & SLOB?

I guess SLOB could be made memcg-aware, but I don't think it's worth the
trouble, although I can take a look in this direction - from a quick
glance at SLOB it shouldn't be difficult. If we decide to go this way, I
think we could use this patch as a temporary fix, which would be
reverted eventually.

Otherwise, no matter how tempting the idea to put all memcg stuff under
CONFIG_MEMCG is, I think it won't fly, so for now we should use ifdefs.
To avoid complex checks, we could define a macro in memcontrol.h, say
MEMCG_KMEM_ENABLED, and use it throughout the code. And I think we
should wrap list_lru stuff in it either :-/

Thanks,
Vladimir

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller
  2015-12-08 15:30   ` Johannes Weiner
@ 2015-12-15 19:50     ` Michal Hocko
  -1 siblings, 0 replies; 66+ messages in thread
From: Michal Hocko @ 2015-12-15 19:50 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Tue 08-12-15 10:30:22, Johannes Weiner wrote:
> Socket memory can be a significant share of overall memory consumed by
> common workloads. In order to provide reasonable resource isolation in
> the unified hierarchy, this type of memory needs to be included in the
> tracking/accounting of a cgroup under active memory resource control.
> 
> Overhead is only incurred when a non-root control group is created AND
> the memory controller is instructed to track and account the memory
> footprint of that group. cgroup.memory=nosocket can be specified on
> the boot commandline to override any runtime configuration and
> forcibly exclude socket memory from active memory resource control.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> Acked-by: David S. Miller <davem@davemloft.net>
> Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>

Sorry I forgot about this
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  Documentation/kernel-parameters.txt |   4 ++
>  include/linux/memcontrol.h          |   9 ++-
>  mm/memcontrol.c                     | 122 +++++++++++++++++++++++++++++-------
>  3 files changed, 110 insertions(+), 25 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 742f69d..7868f1b 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -599,6 +599,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  			cut the overhead, others just disable the usage. So
>  			only cgroup_disable=memory is actually worthy}
>  
> +	cgroup.memory=	[KNL] Pass options to the cgroup memory controller.
> +			Format: <string>
> +			nosocket -- Disable socket memory accounting.
> +
>  	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
>  			Format: { "0" | "1" }
>  			See security/selinux/Kconfig help text.
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 654c2fb..863ae8d 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -170,6 +170,9 @@ struct mem_cgroup {
>  	unsigned long low;
>  	unsigned long high;
>  
> +	/* Range enforcement for interrupt charges */
> +	struct work_struct high_work;
> +
>  	unsigned long soft_limit;
>  
>  	/* vmpressure notifications */
> @@ -684,12 +687,16 @@ void sock_update_memcg(struct sock *sk);
>  void sock_release_memcg(struct sock *sk);
>  bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
>  void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
> -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
> +#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
>  extern struct static_key memcg_sockets_enabled_key;
>  #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
>  static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
>  {
> +#ifdef CONFIG_MEMCG_KMEM
>  	return memcg->tcp_mem.memory_pressure;
> +#else
> +	return false;
> +#endif
>  }
>  #else
>  #define mem_cgroup_sockets_enabled 0
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index ed030b5..59555b0 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
>  
>  #define MEM_CGROUP_RECLAIM_RETRIES	5
>  
> +/* Socket memory accounting disabled? */
> +static bool cgroup_memory_nosocket;
> +
>  /* Whether the swap controller is active */
>  #ifdef CONFIG_MEMCG_SWAP
>  int do_swap_account __read_mostly;
> @@ -1923,6 +1926,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
>  	return NOTIFY_OK;
>  }
>  
> +static void reclaim_high(struct mem_cgroup *memcg,
> +			 unsigned int nr_pages,
> +			 gfp_t gfp_mask)
> +{
> +	do {
> +		if (page_counter_read(&memcg->memory) <= memcg->high)
> +			continue;
> +		mem_cgroup_events(memcg, MEMCG_HIGH, 1);
> +		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
> +	} while ((memcg = parent_mem_cgroup(memcg)));
> +}
> +
> +static void high_work_func(struct work_struct *work)
> +{
> +	struct mem_cgroup *memcg;
> +
> +	memcg = container_of(work, struct mem_cgroup, high_work);
> +	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
> +}
> +
>  /*
>   * Scheduled by try_charge() to be executed from the userland return path
>   * and reclaims memory over the high limit.
> @@ -1930,20 +1953,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
>  void mem_cgroup_handle_over_high(void)
>  {
>  	unsigned int nr_pages = current->memcg_nr_pages_over_high;
> -	struct mem_cgroup *memcg, *pos;
> +	struct mem_cgroup *memcg;
>  
>  	if (likely(!nr_pages))
>  		return;
>  
> -	pos = memcg = get_mem_cgroup_from_mm(current->mm);
> -
> -	do {
> -		if (page_counter_read(&pos->memory) <= pos->high)
> -			continue;
> -		mem_cgroup_events(pos, MEMCG_HIGH, 1);
> -		try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
> -	} while ((pos = parent_mem_cgroup(pos)));
> -
> +	memcg = get_mem_cgroup_from_mm(current->mm);
> +	reclaim_high(memcg, nr_pages, GFP_KERNEL);
>  	css_put(&memcg->css);
>  	current->memcg_nr_pages_over_high = 0;
>  }
> @@ -2078,6 +2094,11 @@ done_restock:
>  	 */
>  	do {
>  		if (page_counter_read(&memcg->memory) > memcg->high) {
> +			/* Don't bother a random interrupted task */
> +			if (in_interrupt()) {
> +				schedule_work(&memcg->high_work);
> +				break;
> +			}
>  			current->memcg_nr_pages_over_high += batch;
>  			set_notify_resume(current);
>  			break;
> @@ -4126,6 +4147,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
>  {
>  	int node;
>  
> +	cancel_work_sync(&memcg->high_work);
> +
>  	mem_cgroup_remove_from_trees(memcg);
>  
>  	for_each_node(node)
> @@ -4172,6 +4195,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>  		page_counter_init(&memcg->kmem, NULL);
>  	}
>  
> +	INIT_WORK(&memcg->high_work, high_work_func);
>  	memcg->last_scanned_node = MAX_NUMNODES;
>  	INIT_LIST_HEAD(&memcg->oom_notify);
>  	memcg->move_charge_at_immigrate = 0;
> @@ -4243,6 +4267,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (ret)
>  		return ret;
>  
> +#ifdef CONFIG_INET
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
> +		static_key_slow_inc(&memcg_sockets_enabled_key);
> +#endif
> +
>  	/*
>  	 * Make sure the memcg is initialized: mem_cgroup_iter()
>  	 * orders reading memcg->initialized against its callers
> @@ -4282,6 +4311,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>  
>  	memcg_destroy_kmem(memcg);
> +#ifdef CONFIG_INET
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
> +		static_key_slow_dec(&memcg_sockets_enabled_key);
> +#endif
>  	__mem_cgroup_free(memcg);
>  }
>  
> @@ -5470,8 +5503,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
>  	commit_charge(newpage, memcg, true);
>  }
>  
> -/* Writing them here to avoid exposing memcg's inner layout */
> -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
> +#ifdef CONFIG_INET
>  
>  struct static_key memcg_sockets_enabled_key;
>  EXPORT_SYMBOL(memcg_sockets_enabled_key);
> @@ -5496,10 +5528,15 @@ void sock_update_memcg(struct sock *sk)
>  
>  	rcu_read_lock();
>  	memcg = mem_cgroup_from_task(current);
> -	if (memcg != root_mem_cgroup &&
> -	    memcg->tcp_mem.active &&
> -	    css_tryget_online(&memcg->css))
> +	if (memcg == root_mem_cgroup)
> +		goto out;
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
> +		goto out;
> +#endif
> +	if (css_tryget_online(&memcg->css))
>  		sk->sk_memcg = memcg;
> +out:
>  	rcu_read_unlock();
>  }
>  EXPORT_SYMBOL(sock_update_memcg);
> @@ -5520,15 +5557,30 @@ void sock_release_memcg(struct sock *sk)
>   */
>  bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>  {
> -	struct page_counter *counter;
> +	gfp_t gfp_mask = GFP_KERNEL;
>  
> -	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
> -				    nr_pages, &counter)) {
> -		memcg->tcp_mem.memory_pressure = 0;
> -		return true;
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		struct page_counter *counter;
> +
> +		if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
> +					    nr_pages, &counter)) {
> +			memcg->tcp_mem.memory_pressure = 0;
> +			return true;
> +		}
> +		page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
> +		memcg->tcp_mem.memory_pressure = 1;
> +		return false;
>  	}
> -	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
> -	memcg->tcp_mem.memory_pressure = 1;
> +#endif
> +	/* Don't block in the packet receive path */
> +	if (in_softirq())
> +		gfp_mask = GFP_NOWAIT;
> +
> +	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
> +		return true;
> +
> +	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
>  	return false;
>  }
>  
> @@ -5539,10 +5591,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>   */
>  void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>  {
> -	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
> +				      nr_pages);
> +		return;
> +	}
> +#endif
> +	page_counter_uncharge(&memcg->memory, nr_pages);
> +	css_put_many(&memcg->css, nr_pages);
>  }
>  
> -#endif
> +#endif /* CONFIG_INET */
> +
> +static int __init cgroup_memory(char *s)
> +{
> +	char *token;
> +
> +	while ((token = strsep(&s, ",")) != NULL) {
> +		if (!*token)
> +			continue;
> +		if (!strcmp(token, "nosocket"))
> +			cgroup_memory_nosocket = true;
> +	}
> +	return 0;
> +}
> +__setup("cgroup.memory=", cgroup_memory);
>  
>  /*
>   * subsys_initcall() for memory controller.
> -- 
> 2.6.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller
@ 2015-12-15 19:50     ` Michal Hocko
  0 siblings, 0 replies; 66+ messages in thread
From: Michal Hocko @ 2015-12-15 19:50 UTC (permalink / raw)
  To: Johannes Weiner; +Cc: Andrew Morton, linux-mm, netdev, cgroups, linux-kernel

On Tue 08-12-15 10:30:22, Johannes Weiner wrote:
> Socket memory can be a significant share of overall memory consumed by
> common workloads. In order to provide reasonable resource isolation in
> the unified hierarchy, this type of memory needs to be included in the
> tracking/accounting of a cgroup under active memory resource control.
> 
> Overhead is only incurred when a non-root control group is created AND
> the memory controller is instructed to track and account the memory
> footprint of that group. cgroup.memory=nosocket can be specified on
> the boot commandline to override any runtime configuration and
> forcibly exclude socket memory from active memory resource control.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> Acked-by: David S. Miller <davem@davemloft.net>
> Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>

Sorry I forgot about this
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  Documentation/kernel-parameters.txt |   4 ++
>  include/linux/memcontrol.h          |   9 ++-
>  mm/memcontrol.c                     | 122 +++++++++++++++++++++++++++++-------
>  3 files changed, 110 insertions(+), 25 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 742f69d..7868f1b 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -599,6 +599,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  			cut the overhead, others just disable the usage. So
>  			only cgroup_disable=memory is actually worthy}
>  
> +	cgroup.memory=	[KNL] Pass options to the cgroup memory controller.
> +			Format: <string>
> +			nosocket -- Disable socket memory accounting.
> +
>  	checkreqprot	[SELINUX] Set initial checkreqprot flag value.
>  			Format: { "0" | "1" }
>  			See security/selinux/Kconfig help text.
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 654c2fb..863ae8d 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -170,6 +170,9 @@ struct mem_cgroup {
>  	unsigned long low;
>  	unsigned long high;
>  
> +	/* Range enforcement for interrupt charges */
> +	struct work_struct high_work;
> +
>  	unsigned long soft_limit;
>  
>  	/* vmpressure notifications */
> @@ -684,12 +687,16 @@ void sock_update_memcg(struct sock *sk);
>  void sock_release_memcg(struct sock *sk);
>  bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
>  void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
> -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
> +#if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
>  extern struct static_key memcg_sockets_enabled_key;
>  #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
>  static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
>  {
> +#ifdef CONFIG_MEMCG_KMEM
>  	return memcg->tcp_mem.memory_pressure;
> +#else
> +	return false;
> +#endif
>  }
>  #else
>  #define mem_cgroup_sockets_enabled 0
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index ed030b5..59555b0 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
>  
>  #define MEM_CGROUP_RECLAIM_RETRIES	5
>  
> +/* Socket memory accounting disabled? */
> +static bool cgroup_memory_nosocket;
> +
>  /* Whether the swap controller is active */
>  #ifdef CONFIG_MEMCG_SWAP
>  int do_swap_account __read_mostly;
> @@ -1923,6 +1926,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
>  	return NOTIFY_OK;
>  }
>  
> +static void reclaim_high(struct mem_cgroup *memcg,
> +			 unsigned int nr_pages,
> +			 gfp_t gfp_mask)
> +{
> +	do {
> +		if (page_counter_read(&memcg->memory) <= memcg->high)
> +			continue;
> +		mem_cgroup_events(memcg, MEMCG_HIGH, 1);
> +		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
> +	} while ((memcg = parent_mem_cgroup(memcg)));
> +}
> +
> +static void high_work_func(struct work_struct *work)
> +{
> +	struct mem_cgroup *memcg;
> +
> +	memcg = container_of(work, struct mem_cgroup, high_work);
> +	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
> +}
> +
>  /*
>   * Scheduled by try_charge() to be executed from the userland return path
>   * and reclaims memory over the high limit.
> @@ -1930,20 +1953,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
>  void mem_cgroup_handle_over_high(void)
>  {
>  	unsigned int nr_pages = current->memcg_nr_pages_over_high;
> -	struct mem_cgroup *memcg, *pos;
> +	struct mem_cgroup *memcg;
>  
>  	if (likely(!nr_pages))
>  		return;
>  
> -	pos = memcg = get_mem_cgroup_from_mm(current->mm);
> -
> -	do {
> -		if (page_counter_read(&pos->memory) <= pos->high)
> -			continue;
> -		mem_cgroup_events(pos, MEMCG_HIGH, 1);
> -		try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
> -	} while ((pos = parent_mem_cgroup(pos)));
> -
> +	memcg = get_mem_cgroup_from_mm(current->mm);
> +	reclaim_high(memcg, nr_pages, GFP_KERNEL);
>  	css_put(&memcg->css);
>  	current->memcg_nr_pages_over_high = 0;
>  }
> @@ -2078,6 +2094,11 @@ done_restock:
>  	 */
>  	do {
>  		if (page_counter_read(&memcg->memory) > memcg->high) {
> +			/* Don't bother a random interrupted task */
> +			if (in_interrupt()) {
> +				schedule_work(&memcg->high_work);
> +				break;
> +			}
>  			current->memcg_nr_pages_over_high += batch;
>  			set_notify_resume(current);
>  			break;
> @@ -4126,6 +4147,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
>  {
>  	int node;
>  
> +	cancel_work_sync(&memcg->high_work);
> +
>  	mem_cgroup_remove_from_trees(memcg);
>  
>  	for_each_node(node)
> @@ -4172,6 +4195,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>  		page_counter_init(&memcg->kmem, NULL);
>  	}
>  
> +	INIT_WORK(&memcg->high_work, high_work_func);
>  	memcg->last_scanned_node = MAX_NUMNODES;
>  	INIT_LIST_HEAD(&memcg->oom_notify);
>  	memcg->move_charge_at_immigrate = 0;
> @@ -4243,6 +4267,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (ret)
>  		return ret;
>  
> +#ifdef CONFIG_INET
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
> +		static_key_slow_inc(&memcg_sockets_enabled_key);
> +#endif
> +
>  	/*
>  	 * Make sure the memcg is initialized: mem_cgroup_iter()
>  	 * orders reading memcg->initialized against its callers
> @@ -4282,6 +4311,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>  
>  	memcg_destroy_kmem(memcg);
> +#ifdef CONFIG_INET
> +	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
> +		static_key_slow_dec(&memcg_sockets_enabled_key);
> +#endif
>  	__mem_cgroup_free(memcg);
>  }
>  
> @@ -5470,8 +5503,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
>  	commit_charge(newpage, memcg, true);
>  }
>  
> -/* Writing them here to avoid exposing memcg's inner layout */
> -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
> +#ifdef CONFIG_INET
>  
>  struct static_key memcg_sockets_enabled_key;
>  EXPORT_SYMBOL(memcg_sockets_enabled_key);
> @@ -5496,10 +5528,15 @@ void sock_update_memcg(struct sock *sk)
>  
>  	rcu_read_lock();
>  	memcg = mem_cgroup_from_task(current);
> -	if (memcg != root_mem_cgroup &&
> -	    memcg->tcp_mem.active &&
> -	    css_tryget_online(&memcg->css))
> +	if (memcg == root_mem_cgroup)
> +		goto out;
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
> +		goto out;
> +#endif
> +	if (css_tryget_online(&memcg->css))
>  		sk->sk_memcg = memcg;
> +out:
>  	rcu_read_unlock();
>  }
>  EXPORT_SYMBOL(sock_update_memcg);
> @@ -5520,15 +5557,30 @@ void sock_release_memcg(struct sock *sk)
>   */
>  bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>  {
> -	struct page_counter *counter;
> +	gfp_t gfp_mask = GFP_KERNEL;
>  
> -	if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
> -				    nr_pages, &counter)) {
> -		memcg->tcp_mem.memory_pressure = 0;
> -		return true;
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		struct page_counter *counter;
> +
> +		if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
> +					    nr_pages, &counter)) {
> +			memcg->tcp_mem.memory_pressure = 0;
> +			return true;
> +		}
> +		page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
> +		memcg->tcp_mem.memory_pressure = 1;
> +		return false;
>  	}
> -	page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
> -	memcg->tcp_mem.memory_pressure = 1;
> +#endif
> +	/* Don't block in the packet receive path */
> +	if (in_softirq())
> +		gfp_mask = GFP_NOWAIT;
> +
> +	if (try_charge(memcg, gfp_mask, nr_pages) == 0)
> +		return true;
> +
> +	try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
>  	return false;
>  }
>  
> @@ -5539,10 +5591,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>   */
>  void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>  {
> -	page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
> +#ifdef CONFIG_MEMCG_KMEM
> +	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
> +				      nr_pages);
> +		return;
> +	}
> +#endif
> +	page_counter_uncharge(&memcg->memory, nr_pages);
> +	css_put_many(&memcg->css, nr_pages);
>  }
>  
> -#endif
> +#endif /* CONFIG_INET */
> +
> +static int __init cgroup_memory(char *s)
> +{
> +	char *token;
> +
> +	while ((token = strsep(&s, ",")) != NULL) {
> +		if (!*token)
> +			continue;
> +		if (!strcmp(token, "nosocket"))
> +			cgroup_memory_nosocket = true;
> +	}
> +	return 0;
> +}
> +__setup("cgroup.memory=", cgroup_memory);
>  
>  /*
>   * subsys_initcall() for memory controller.
> -- 
> 2.6.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
  2015-12-09 23:13             ` Andrew Morton
@ 2016-01-22  3:25               ` Masanari Iida
  -1 siblings, 0 replies; 66+ messages in thread
From: Masanari Iida @ 2016-01-22  3:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

Hi,
I hit this while I was testing 4.5-rc1 with randconfig during merger period.
And now I noticed that it was fixed after Linus merged akpm branch.

commit eae21770b4fed5597623aad0d618190fa60426ff
Merge: e9f57eb 9f273c2
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu Jan 21 12:32:08 2016 -0800

    Merge branch 'akpm' (patches from Andrew)

Try one commit before this (commit e9f57ebcba563e0cd532926cab83c92bb4d79360 )
DOES have an issue.
So I believe it was fixed for now.
Thanks

Masanari


On Thu, Dec 10, 2015 at 8:13 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> On Wed, 9 Dec 2015 18:05:05 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
>
>> On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
>> > On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
>> > > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
>> > > memcontrol: hook up vmpressure to socket pressure". However, they get
>> > > moved around a few times so fixing it earlier means respinning the
>> > > series. Andrew, it's up to you whether we take the bisectability hit
>> > > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
>> > > want me to resend the series.
>> >
>> > hm, drat, I was suspecting dependency issues here, but a test build
>> > said it was OK.
>> >
>> > Actually, I was expecting this patch series to depend on the linux-next
>> > cgroup2 changes, but that doesn't appear to be the case.  *should* this
>> > series be staged after the cgroup2 code?
>>
>> Code-wise they are independent. My stuff is finishing up the new memcg
>> control knobs, the cgroup2 stuff is changing how and when those knobs
>> are exposed from within the cgroup core. I'm not relying on any recent
>> changes in the cgroup core AFAICS, so the order shouldn't matter here.
>
> OK, thanks.
>
>> > Regarding this particular series: yes, I think we can live with a
>> > bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
>> > sure why we're discussing bisection issues, because Arnd's build
>> > failure occurs with everything applied?
>>
>> Arnd's patches apply to the top of the stack, but they address issues
>> introduced early in the series and the problematic code gets touched a
>> lot in subsequent patches. E.g. the first build breakage is in ("net:
>> tcp_memcontrol: simplify linkage between socket and page counter")
>> when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
>> moved around and lose the CONFIG_INET protection.
>
> Yeah, this is a pain.  I think I'll fold Arnd's fix into
> mm-memcontrol-introduce-config_memcg_legacy_kmem.patch (which is staged
> after all the other MM patches and after linux-next) and will pretend I
> didn't know about the issue ;)
>
>> Anyway, if we can live with the bisection caveat then Arnd's fixes on
>> top of the kmem series look good to me. Depending on what Vladimir
>> thinks we might want to replace the CONFIG_SLOB fix with something
>> else later on, but that shouldn't be a problem, either.
>
> I don't have a fix for the CONFIG_SLOB&&CONFIG_MEMCG issue yet.  I
> agree that it would be best to make the combination work correctly
> rather than banning it, but that does require a bit of runtime testing.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 66+ messages in thread

* Re: [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET
@ 2016-01-22  3:25               ` Masanari Iida
  0 siblings, 0 replies; 66+ messages in thread
From: Masanari Iida @ 2016-01-22  3:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Arnd Bergmann, linux-mm, netdev, cgroups, linux-kernel

Hi,
I hit this while I was testing 4.5-rc1 with randconfig during merger period.
And now I noticed that it was fixed after Linus merged akpm branch.

commit eae21770b4fed5597623aad0d618190fa60426ff
Merge: e9f57eb 9f273c2
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Thu Jan 21 12:32:08 2016 -0800

    Merge branch 'akpm' (patches from Andrew)

Try one commit before this (commit e9f57ebcba563e0cd532926cab83c92bb4d79360 )
DOES have an issue.
So I believe it was fixed for now.
Thanks

Masanari


On Thu, Dec 10, 2015 at 8:13 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> On Wed, 9 Dec 2015 18:05:05 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
>
>> On Wed, Dec 09, 2015 at 02:28:36PM -0800, Andrew Morton wrote:
>> > On Wed, 9 Dec 2015 13:58:58 -0500 Johannes Weiner <hannes@cmpxchg.org> wrote:
>> > > The calls to tcp_init_cgroup() appear earlier in the series than "mm:
>> > > memcontrol: hook up vmpressure to socket pressure". However, they get
>> > > moved around a few times so fixing it earlier means respinning the
>> > > series. Andrew, it's up to you whether we take the bisectability hit
>> > > for !CONFIG_INET && CONFIG_MEMCG (how common is this?) or whether you
>> > > want me to resend the series.
>> >
>> > hm, drat, I was suspecting dependency issues here, but a test build
>> > said it was OK.
>> >
>> > Actually, I was expecting this patch series to depend on the linux-next
>> > cgroup2 changes, but that doesn't appear to be the case.  *should* this
>> > series be staged after the cgroup2 code?
>>
>> Code-wise they are independent. My stuff is finishing up the new memcg
>> control knobs, the cgroup2 stuff is changing how and when those knobs
>> are exposed from within the cgroup core. I'm not relying on any recent
>> changes in the cgroup core AFAICS, so the order shouldn't matter here.
>
> OK, thanks.
>
>> > Regarding this particular series: yes, I think we can live with a
>> > bisection hole for !CONFIG_INET && CONFIG_MEMCG users.  But I'm not
>> > sure why we're discussing bisection issues, because Arnd's build
>> > failure occurs with everything applied?
>>
>> Arnd's patches apply to the top of the stack, but they address issues
>> introduced early in the series and the problematic code gets touched a
>> lot in subsequent patches. E.g. the first build breakage is in ("net:
>> tcp_memcontrol: simplify linkage between socket and page counter")
>> when the tcp_init_cgroup() and tcp_destroy_cgroup() function calls get
>> moved around and lose the CONFIG_INET protection.
>
> Yeah, this is a pain.  I think I'll fold Arnd's fix into
> mm-memcontrol-introduce-config_memcg_legacy_kmem.patch (which is staged
> after all the other MM patches and after linux-next) and will pretend I
> didn't know about the issue ;)
>
>> Anyway, if we can live with the bisection caveat then Arnd's fixes on
>> top of the kmem series look good to me. Depending on what Vladimir
>> thinks we might want to replace the CONFIG_SLOB fix with something
>> else later on, but that shouldn't be a problem, either.
>
> I don't have a fix for the CONFIG_SLOB&&CONFIG_MEMCG issue yet.  I
> agree that it would be best to make the combination work correctly
> rather than banning it, but that does require a bit of runtime testing.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 66+ messages in thread

end of thread, other threads:[~2016-01-22  3:25 UTC | newest]

Thread overview: 66+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-08 15:30 [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND Johannes Weiner
2015-12-08 15:30 ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 01/14] mm: memcontrol: export root_mem_cgroup Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 02/14] net: tcp_memcontrol: properly detect ancestor socket pressure Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 03/14] net: tcp_memcontrol: remove bogus hierarchy pressure propagation Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 04/14] net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 05/14] net: tcp_memcontrol: remove dead per-memcg count of allocated sockets Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 06/14] net: tcp_memcontrol: simplify the per-memcg limit access Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 07/14] net: tcp_memcontrol: sanitize tcp memory accounting callbacks Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 08/14] net: tcp_memcontrol: simplify linkage between socket and page counter Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 09/14] mm: memcontrol: generalize the socket accounting jump label Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 10/14] mm: memcontrol: do not account memory+swap on unified hierarchy Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 11/14] mm: memcontrol: move socket code for unified hierarchy accounting Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 12/14] mm: memcontrol: account socket memory in unified hierarchy memory controller Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-15 19:50   ` Michal Hocko
2015-12-15 19:50     ` Michal Hocko
2015-12-08 15:30 ` [PATCH 13/14] mm: memcontrol: hook up vmpressure to socket pressure Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 15:30 ` [PATCH 14/14] mm: memcontrol: switch to the updated jump-label API Johannes Weiner
2015-12-08 15:30   ` Johannes Weiner
2015-12-08 16:28   ` David Miller
2015-12-08 16:28     ` David Miller
2015-12-08 16:28 ` [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND David Miller
2015-12-08 16:28   ` David Miller
2015-12-09 16:31 ` Arnd Bergmann
2015-12-09 16:31   ` Arnd Bergmann
2015-12-09 16:31   ` Arnd Bergmann
2015-12-09 16:32   ` [PATCH] mm: memcontrol: only manage socket pressure for CONFIG_INET Arnd Bergmann
2015-12-09 16:32     ` Arnd Bergmann
2015-12-09 16:32     ` Arnd Bergmann
2015-12-09 18:58     ` Johannes Weiner
2015-12-09 18:58       ` Johannes Weiner
2015-12-09 22:28       ` Andrew Morton
2015-12-09 22:28         ` Andrew Morton
2015-12-09 22:28         ` Andrew Morton
2015-12-09 23:05         ` Johannes Weiner
2015-12-09 23:05           ` Johannes Weiner
2015-12-09 23:13           ` Andrew Morton
2015-12-09 23:13             ` Andrew Morton
2016-01-22  3:25             ` Masanari Iida
2016-01-22  3:25               ` Masanari Iida
2015-12-09 16:32   ` [PATCH] mm: memcontrol: MEMCG no longer works with SLOB Arnd Bergmann
2015-12-09 16:32     ` Arnd Bergmann
2015-12-09 20:01     ` Johannes Weiner
2015-12-09 20:01       ` Johannes Weiner
2015-12-09 20:01       ` Johannes Weiner
2015-12-09 21:03       ` Arnd Bergmann
2015-12-09 21:03         ` Arnd Bergmann
2015-12-10 11:24       ` Vladimir Davydov
2015-12-10 11:24         ` Vladimir Davydov
2015-12-10 11:24         ` Vladimir Davydov
2015-12-10 11:24         ` Vladimir Davydov
2015-12-09 18:17   ` [PATCH 00/14] mm: memcontrol: account socket memory in unified hierarchy v4-RESEND Johannes Weiner
2015-12-09 18:17     ` Johannes Weiner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.