* [PATCH] memcg: charge fs_context and legacy_fs_context
@ 2021-07-17 10:20 ` Yutian Yang
0 siblings, 0 replies; 4+ messages in thread
From: Yutian Yang @ 2021-07-17 10:20 UTC (permalink / raw)
To: mhocko, hannes, vdavydov.dev; +Cc: cgroups, linux-mm, shenwenbo, Yutian Yang
This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.
We have written a PoC to demonstrate the effect of the missing-charging
bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged
for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90
+ Linux kernel v5.10.19 + Debian buster. All the limitations including
ulimits and sysctl variables are set as default. Specifically, the hard
NOFILE limit and nr_open in sysctl are both 1,048,576.
/*------------------------- POC code ----------------------------*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
return syscall(__NR_fsopen, fs_name, flags);
}
static char thread_stack[512][STACK_SIZE];
int thread_fn(void* arg)
{
for (int i = 0; i< 800000; ++i) {
int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
if (fsfd == -1) {
errExit("fsopen");
}
}
while(1);
return 0;
}
int main(int argc, char *argv[]) {
int thread_pid;
for (int i = 0; i < 1; ++i) {
thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
SIGCHLD, NULL);
}
while(1);
return 0;
}
/*-------------------------- end --------------------------------*/
Thanks!
Yutian Yang,
Zhejiang University
Signed-off-by: Yutian Yang <nglaive@gmail.com>
---
fs/fs_context.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 2834d1afa..4858645ca 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
struct fs_context *fc;
int ret = -ENOMEM;
- fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
if (!fc)
return ERR_PTR(-ENOMEM);
@@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
*/
static int legacy_init_fs_context(struct fs_context *fc)
{
- fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+ fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
if (!fc->fs_private)
return -ENOMEM;
fc->ops = &legacy_fs_context_ops;
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH] memcg: charge fs_context and legacy_fs_context
@ 2021-07-17 10:20 ` Yutian Yang
0 siblings, 0 replies; 4+ messages in thread
From: Yutian Yang @ 2021-07-17 10:20 UTC (permalink / raw)
To: mhocko-DgEjT+Ai2ygdnm+yROfE0A, hannes-druUgvl0LCNAfugRpC6u6w,
vdavydov.dev-Re5JQEeQqe8AvxtiuMwx3w
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
shenwenbo-Y5EWUtBUdg4nDS1+zs4M5A, Yutian Yang
This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.
We have written a PoC to demonstrate the effect of the missing-charging
bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged
for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90
+ Linux kernel v5.10.19 + Debian buster. All the limitations including
ulimits and sysctl variables are set as default. Specifically, the hard
NOFILE limit and nr_open in sysctl are both 1,048,576.
/*------------------------- POC code ----------------------------*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
return syscall(__NR_fsopen, fs_name, flags);
}
static char thread_stack[512][STACK_SIZE];
int thread_fn(void* arg)
{
for (int i = 0; i< 800000; ++i) {
int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
if (fsfd == -1) {
errExit("fsopen");
}
}
while(1);
return 0;
}
int main(int argc, char *argv[]) {
int thread_pid;
for (int i = 0; i < 1; ++i) {
thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
SIGCHLD, NULL);
}
while(1);
return 0;
}
/*-------------------------- end --------------------------------*/
Thanks!
Yutian Yang,
Zhejiang University
Signed-off-by: Yutian Yang <nglaive-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
fs/fs_context.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 2834d1afa..4858645ca 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
struct fs_context *fc;
int ret = -ENOMEM;
- fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
if (!fc)
return ERR_PTR(-ENOMEM);
@@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
*/
static int legacy_init_fs_context(struct fs_context *fc)
{
- fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+ fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
if (!fc->fs_private)
return -ENOMEM;
fc->ops = &legacy_fs_context_ops;
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] memcg: charge fs_context and legacy_fs_context
@ 2021-07-17 16:52 ` Shakeel Butt
0 siblings, 0 replies; 4+ messages in thread
From: Shakeel Butt @ 2021-07-17 16:52 UTC (permalink / raw)
To: Yutian Yang, Andrew Morton
Cc: Michal Hocko, Johannes Weiner, Vladimir Davydov, Cgroups,
Linux MM, shenwenbo
+Andrew Morton
On Sat, Jul 17, 2021 at 3:23 AM Yutian Yang <nglaive@gmail.com> wrote:
>
> This patch adds accounting flags to fs_context and legacy_fs_context
> allocation sites so that kernel could correctly charge these objects.
>
> We have written a PoC to demonstrate the effect of the missing-charging
> bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged
> for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90
> + Linux kernel v5.10.19 + Debian buster. All the limitations including
> ulimits and sysctl variables are set as default. Specifically, the hard
> NOFILE limit and nr_open in sysctl are both 1,048,576.
>
> /*------------------------- POC code ----------------------------*/
>
> #define _GNU_SOURCE
> #include <sys/types.h>
> #include <sys/file.h>
> #include <time.h>
> #include <sys/wait.h>
> #include <stdint.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <stdio.h>
> #include <signal.h>
> #include <sched.h>
> #include <fcntl.h>
> #include <linux/mount.h>
>
> #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
> } while (0)
>
> #define STACK_SIZE (8 * 1024)
> #ifndef __NR_fsopen
> #define __NR_fsopen 430
> #endif
> static inline int fsopen(const char *fs_name, unsigned int flags)
> {
> return syscall(__NR_fsopen, fs_name, flags);
> }
>
> static char thread_stack[512][STACK_SIZE];
>
> int thread_fn(void* arg)
> {
> for (int i = 0; i< 800000; ++i) {
> int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
> if (fsfd == -1) {
> errExit("fsopen");
> }
> }
> while(1);
> return 0;
> }
>
> int main(int argc, char *argv[]) {
> int thread_pid;
> for (int i = 0; i < 1; ++i) {
> thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
> SIGCHLD, NULL);
> }
> while(1);
> return 0;
> }
>
> /*-------------------------- end --------------------------------*/
>
>
> Thanks!
> Yutian Yang,
> Zhejiang University
>
>
> Signed-off-by: Yutian Yang <nglaive@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
I think this can go through the mm tree.
> ---
> fs/fs_context.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index 2834d1afa..4858645ca 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
> struct fs_context *fc;
> int ret = -ENOMEM;
>
> - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
> + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
> if (!fc)
> return ERR_PTR(-ENOMEM);
>
> @@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
> */
> static int legacy_init_fs_context(struct fs_context *fc)
> {
> - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
> + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
> if (!fc->fs_private)
> return -ENOMEM;
> fc->ops = &legacy_fs_context_ops;
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] memcg: charge fs_context and legacy_fs_context
@ 2021-07-17 16:52 ` Shakeel Butt
0 siblings, 0 replies; 4+ messages in thread
From: Shakeel Butt @ 2021-07-17 16:52 UTC (permalink / raw)
To: Yutian Yang, Andrew Morton
Cc: Michal Hocko, Johannes Weiner, Vladimir Davydov, Cgroups,
Linux MM, shenwenbo-Y5EWUtBUdg4nDS1+zs4M5A
+Andrew Morton
On Sat, Jul 17, 2021 at 3:23 AM Yutian Yang <nglaive-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
> This patch adds accounting flags to fs_context and legacy_fs_context
> allocation sites so that kernel could correctly charge these objects.
>
> We have written a PoC to demonstrate the effect of the missing-charging
> bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged
> for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90
> + Linux kernel v5.10.19 + Debian buster. All the limitations including
> ulimits and sysctl variables are set as default. Specifically, the hard
> NOFILE limit and nr_open in sysctl are both 1,048,576.
>
> /*------------------------- POC code ----------------------------*/
>
> #define _GNU_SOURCE
> #include <sys/types.h>
> #include <sys/file.h>
> #include <time.h>
> #include <sys/wait.h>
> #include <stdint.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <stdio.h>
> #include <signal.h>
> #include <sched.h>
> #include <fcntl.h>
> #include <linux/mount.h>
>
> #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
> } while (0)
>
> #define STACK_SIZE (8 * 1024)
> #ifndef __NR_fsopen
> #define __NR_fsopen 430
> #endif
> static inline int fsopen(const char *fs_name, unsigned int flags)
> {
> return syscall(__NR_fsopen, fs_name, flags);
> }
>
> static char thread_stack[512][STACK_SIZE];
>
> int thread_fn(void* arg)
> {
> for (int i = 0; i< 800000; ++i) {
> int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
> if (fsfd == -1) {
> errExit("fsopen");
> }
> }
> while(1);
> return 0;
> }
>
> int main(int argc, char *argv[]) {
> int thread_pid;
> for (int i = 0; i < 1; ++i) {
> thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
> SIGCHLD, NULL);
> }
> while(1);
> return 0;
> }
>
> /*-------------------------- end --------------------------------*/
>
>
> Thanks!
> Yutian Yang,
> Zhejiang University
>
>
> Signed-off-by: Yutian Yang <nglaive-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Reviewed-by: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
I think this can go through the mm tree.
> ---
> fs/fs_context.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index 2834d1afa..4858645ca 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
> struct fs_context *fc;
> int ret = -ENOMEM;
>
> - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
> + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT);
> if (!fc)
> return ERR_PTR(-ENOMEM);
>
> @@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = {
> */
> static int legacy_init_fs_context(struct fs_context *fc)
> {
> - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
> + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT);
> if (!fc->fs_private)
> return -ENOMEM;
> fc->ops = &legacy_fs_context_ops;
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-07-17 16:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-17 10:20 [PATCH] memcg: charge fs_context and legacy_fs_context Yutian Yang
2021-07-17 10:20 ` Yutian Yang
2021-07-17 16:52 ` Shakeel Butt
2021-07-17 16:52 ` Shakeel Butt
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.