All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
       [not found] <20211110211951.3730787-1-almasrymina@google.com>
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  3 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Add memcg= option to shmem mount.

Users can specify this option at mount time and all data page charges
will be charged to the memcg supplied. Processes are only allowed to
direct tmpfs changes to a cgroup that they themselves can enter and
allocate memory in.

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
CC: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---

Changes in v2:
- Fixed Roman's email.
- Added a new wrapper around charge_memcg() instead of __mem_cgroup_charge()
- Merged the permission check into this patch as Roman suggested.
- Instead of checking for a s_memcg_to_charge off the superblock in the
filemap code, I set_active_memcg() before calling into the fs generic
code as Dave suggests.
- I have kept the s_memcg_to_charge in the superblock to keep the
struct address_space pointer small and preserve the remount use case..

---
 fs/super.c                 |   3 +
 include/linux/fs.h         |   5 ++
 include/linux/memcontrol.h |  47 ++++++++++++++
 mm/memcontrol.c            | 130 +++++++++++++++++++++++++++++++++++++
 mm/shmem.c                 |  71 +++++++++++++++++++-
 5 files changed, 254 insertions(+), 2 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 3bfc0f8fbd5bc..8aafe5e4e6200 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/memcontrol.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
@@ -180,6 +181,7 @@ static void destroy_unused_super(struct super_block *s)
 	up_write(&s->s_umount);
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
+	mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 	security_sb_free(s);
 	put_user_ns(s->s_user_ns);
 	kfree(s->s_subtype);
@@ -292,6 +294,7 @@ static void __put_super(struct super_block *s)
 		WARN_ON(s->s_dentry_lru.node);
 		WARN_ON(s->s_inode_lru.node);
 		WARN_ON(!list_empty(&s->s_mounts));
+		mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 		security_sb_free(s);
 		fscrypt_sb_free(s);
 		put_user_ns(s->s_user_ns);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3afca821df32e..59407b3e7aee3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1567,6 +1567,11 @@ struct super_block {
 	struct workqueue_struct *s_dio_done_wq;
 	struct hlist_head s_pins;

+#ifdef CONFIG_MEMCG
+	/* memcg to charge for pages allocated to this filesystem */
+	struct mem_cgroup *s_memcg_to_charge;
+#endif
+
 	/*
 	 * Owning user namespace and default context in which to
 	 * interpret filesystem uids, gids, quotas, device nodes,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c5c403f4be6b..866904afd3563 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,6 +27,7 @@ struct obj_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
+struct super_block;

 /* Cgroup-specific page state, on top of universal node page state */
 enum memcg_stat_item {
@@ -713,6 +714,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 	return __mem_cgroup_charge(folio, mm, gfp);
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp);
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -923,6 +927,17 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 	return !!(memcg->css.flags & CSS_ONLINE);
 }

+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping);
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg);
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
+/**
+ * User is responsible for providing a buffer @buf of length @len and freeing
+ * it.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);

@@ -1223,6 +1238,38 @@ static inline int mem_cgroup_charge(struct folio *folio,
 	return 0;
 }

+static inline int mem_cgroup_charge_memcg(struct folio *folio,
+					  struct mem_cgroup *memcg,
+					  gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+						struct mem_cgroup *memcg)
+{
+}
+
+static inline struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	return NULL;
+}
+
+static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
+					      size_t len)
+{
+	if (len < 1)
+		return -EINVAL;
+	buf[0] = '\0';
+	return 0;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 781605e920153..b3d8f52a63d17 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
 #include <linux/tracehook.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
+#include <linux/string.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2580,6 +2581,126 @@ void mem_cgroup_handle_over_high(void)
 	css_put(&memcg->css);
 }

+/*
+ * Non error return value must eventually be released with css_put().
+ */
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	static const char procs_filename[] = "/cgroup.procs";
+	struct file *file, *procs;
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg;
+	char *procs_path =
+		kmalloc(strlen(path) + sizeof(procs_filename), GFP_KERNEL);
+
+	if (procs_path == NULL)
+		return ERR_PTR(-ENOMEM);
+	strcpy(procs_path, path);
+	strcat(procs_path, procs_filename);
+
+	procs = filp_open(procs_path, O_WRONLY, 0);
+	kfree(procs_path);
+
+	/*
+	 * Restrict the capability for tasks to mount with memcg charging to the
+	 * cgroup they could not join. For example, disallow:
+	 *
+	 * mount -t tmpfs -o memcg=root-cgroup nodev <MOUNT_DIR>
+	 *
+	 * if it is a non-root task.
+	 */
+	if (IS_ERR(procs))
+		return (struct mem_cgroup *)procs;
+	fput(procs);
+
+	file = filp_open(path, O_DIRECTORY | O_RDONLY, 0);
+	if (IS_ERR(file))
+		return (struct mem_cgroup *)file;
+
+	css = css_tryget_online_from_dir(file->f_path.dentry,
+					 &memory_cgrp_subsys);
+	if (IS_ERR(css))
+		memcg = (struct mem_cgroup *)css;
+	else
+		memcg = container_of(css, struct mem_cgroup, css);
+
+	fput(file);
+	return memcg;
+}
+
+/*
+ * Get the name of the optional charge target memcg associated with @sb.  This
+ * is the cgroup name, not the cgroup path.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
+{
+	struct mem_cgroup *memcg;
+	int ret = 0;
+
+	buf[0] = '\0';
+
+	rcu_read_lock();
+	memcg = rcu_dereference(sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	if (!memcg)
+		return 0;
+
+	ret = cgroup_path(memcg->css.cgroup, buf + len / 2, len / 2);
+	if (ret >= len / 2)
+		strcpy(buf, "?");
+	else {
+		char *p = mangle_path(buf, buf + len / 2, " \t\n\\");
+
+		if (p)
+			*p = '\0';
+		else
+			strcpy(buf, "?");
+	}
+
+	css_put(&memcg->css);
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * Set or clear (if @memcg is NULL) charge association from file system to
+ * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
+ * ensure that the cgroup is not deleted during this operation.
+ */
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_get(&memcg->css);
+	memcg = xchg(target, memcg);
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+/*
+ * Returns the memcg to charge for inode pages.  If non-NULL is returned, caller
+ * must drop reference with css_put().  NULL indicates that the inode does not
+ * have a memcg to charge, so the default process based policy should be used.
+ */
+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	struct mem_cgroup *memcg;
+
+	if (!mapping)
+		return NULL;
+
+	rcu_read_lock();
+	memcg = rcu_dereference(mapping->host->i_sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	return memcg;
+}
+
 static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			unsigned int nr_pages)
 {
@@ -6678,6 +6799,15 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 	return ret;
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+
+	return charge_memcg(folio, memcg, gfp);
+}
+
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
 {
 	struct mem_cgroup *memcg;
diff --git a/mm/shmem.c b/mm/shmem.c
index 23c91a8beb781..68f5f45f40cbc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -115,10 +115,14 @@ struct shmem_options {
 	bool full_inums;
 	int huge;
 	int seen;
+#if CONFIG_MEMCG
+	struct mem_cgroup *memcg;
+#endif
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
+#define SHMEM_SEEN_MEMCG 16
 };

 #ifdef CONFIG_TMPFS
@@ -697,6 +701,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	unsigned long i = 0;
 	unsigned long nr = compound_nr(page);
 	int error;
+	struct mem_cgroup *remote_memcg;

 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -709,7 +714,14 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->index = index;

 	if (!PageSwapCache(page)) {
-		error = mem_cgroup_charge(page_folio(page), charge_mm, gfp);
+		remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+		if (remote_memcg) {
+			error = mem_cgroup_charge_memcg(page_folio(page),
+							remote_memcg, gfp);
+			css_put(&remote_memcg->css);
+		} else
+			error = mem_cgroup_charge(page_folio(page), charge_mm,
+						  gfp);
 		if (error) {
 			if (PageTransHuge(page)) {
 				count_vm_event(THP_FILE_FALLBACK);
@@ -1822,6 +1834,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	int error;
 	int once = 0;
 	int alloced = 0;
+	struct mem_cgroup *remote_memcg, *old_memcg = NULL;

 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
@@ -1834,8 +1847,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	sbinfo = SHMEM_SB(inode->i_sb);
 	charge_mm = vma ? vma->vm_mm : NULL;

+	/*
+	 * If we're doing a remote charge here, set the active_memcg as the
+	 * remote memcg, so that eventually if pagecache_get_page() calls into
+	 * filemap_add_folio(), we charge the correct memcg.
+	 */
+	remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+	if (remote_memcg)
+		old_memcg = set_active_memcg(remote_memcg);
+
 	page = pagecache_get_page(mapping, index,
 					FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
+	if (old_memcg) {
+		set_active_memcg(old_memcg);
+		css_put(&remote_memcg->css);
+	}

 	if (page && vma && userfaultfd_minor(vma)) {
 		if (!xa_is_value(page)) {
@@ -3342,6 +3368,7 @@ static const struct export_operations shmem_export_ops = {
 enum shmem_param {
 	Opt_gid,
 	Opt_huge,
+	Opt_memcg,
 	Opt_mode,
 	Opt_mpol,
 	Opt_nr_blocks,
@@ -3363,6 +3390,7 @@ static const struct constant_table shmem_param_enums_huge[] = {
 const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_u32   ("gid",		Opt_gid),
 	fsparam_enum  ("huge",		Opt_huge,  shmem_param_enums_huge),
+	fsparam_string("memcg",		Opt_memcg),
 	fsparam_u32oct("mode",		Opt_mode),
 	fsparam_string("mpol",		Opt_mpol),
 	fsparam_string("nr_blocks",	Opt_nr_blocks),
@@ -3379,6 +3407,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 	struct shmem_options *ctx = fc->fs_private;
 	struct fs_parse_result result;
 	unsigned long long size;
+	struct mem_cgroup *memcg;
 	char *rest;
 	int opt;

@@ -3412,6 +3441,17 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_INODES;
 		break;
+#ifdef CONFIG_MEMCG
+	case Opt_memcg:
+		if (ctx->memcg)
+			css_put(&ctx->memcg->css);
+		memcg = mem_cgroup_get_from_path(param->string);
+		if (IS_ERR(memcg))
+			goto bad_value;
+		ctx->memcg = memcg;
+		ctx->seen |= SHMEM_SEEN_MEMCG;
+		break;
+#endif
 	case Opt_mode:
 		ctx->mode = result.uint_32 & 07777;
 		break;
@@ -3573,6 +3613,14 @@ static int shmem_reconfigure(struct fs_context *fc)
 	}
 	raw_spin_unlock(&sbinfo->stat_lock);
 	mpol_put(mpol);
+#if CONFIG_MEMCG
+	if (ctx->seen & SHMEM_SEEN_MEMCG && ctx->memcg) {
+		mem_cgroup_set_charge_target(&fc->root->d_sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif
 	return 0;
 out:
 	raw_spin_unlock(&sbinfo->stat_lock);
@@ -3582,6 +3630,11 @@ static int shmem_reconfigure(struct fs_context *fc)
 static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
+	int err;
+	char *buf = __getname();
+
+	if (!buf)
+		return -ENOMEM;

 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
@@ -3625,7 +3678,13 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
 #endif
 	shmem_show_mpol(seq, sbinfo->mpol);
-	return 0;
+	/* Memory cgroup binding: memcg=cgroup_name */
+	err = mem_cgroup_get_name_from_sb(root->d_sb, buf, PATH_MAX);
+	if (!err && buf[0] != '\0')
+		seq_printf(seq, ",memcg=%s", buf);
+
+	__putname(buf);
+	return err;
 }

 #endif /* CONFIG_TMPFS */
@@ -3710,6 +3769,14 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_flags |= SB_POSIXACL;
 #endif
 	uuid_gen(&sb->s_uuid);
+#if CONFIG_MEMCG
+	if (ctx->memcg) {
+		mem_cgroup_set_charge_target(&sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif

 	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (!inode)
--
2.34.0.rc0.344.g81b53c2807-goog

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Add memcg= option to shmem mount.

Users can specify this option at mount time and all data page charges
will be charged to the memcg supplied. Processes are only allowed to
direct tmpfs changes to a cgroup that they themselves can enter and
allocate memory in.

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
CC: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---

Changes in v2:
- Fixed Roman's email.
- Added a new wrapper around charge_memcg() instead of __mem_cgroup_charge()
- Merged the permission check into this patch as Roman suggested.
- Instead of checking for a s_memcg_to_charge off the superblock in the
filemap code, I set_active_memcg() before calling into the fs generic
code as Dave suggests.
- I have kept the s_memcg_to_charge in the superblock to keep the
struct address_space pointer small and preserve the remount use case..

---
 fs/super.c                 |   3 +
 include/linux/fs.h         |   5 ++
 include/linux/memcontrol.h |  47 ++++++++++++++
 mm/memcontrol.c            | 130 +++++++++++++++++++++++++++++++++++++
 mm/shmem.c                 |  71 +++++++++++++++++++-
 5 files changed, 254 insertions(+), 2 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 3bfc0f8fbd5bc..8aafe5e4e6200 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/memcontrol.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
@@ -180,6 +181,7 @@ static void destroy_unused_super(struct super_block *s)
 	up_write(&s->s_umount);
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
+	mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 	security_sb_free(s);
 	put_user_ns(s->s_user_ns);
 	kfree(s->s_subtype);
@@ -292,6 +294,7 @@ static void __put_super(struct super_block *s)
 		WARN_ON(s->s_dentry_lru.node);
 		WARN_ON(s->s_inode_lru.node);
 		WARN_ON(!list_empty(&s->s_mounts));
+		mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 		security_sb_free(s);
 		fscrypt_sb_free(s);
 		put_user_ns(s->s_user_ns);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3afca821df32e..59407b3e7aee3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1567,6 +1567,11 @@ struct super_block {
 	struct workqueue_struct *s_dio_done_wq;
 	struct hlist_head s_pins;

+#ifdef CONFIG_MEMCG
+	/* memcg to charge for pages allocated to this filesystem */
+	struct mem_cgroup *s_memcg_to_charge;
+#endif
+
 	/*
 	 * Owning user namespace and default context in which to
 	 * interpret filesystem uids, gids, quotas, device nodes,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c5c403f4be6b..866904afd3563 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,6 +27,7 @@ struct obj_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
+struct super_block;

 /* Cgroup-specific page state, on top of universal node page state */
 enum memcg_stat_item {
@@ -713,6 +714,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 	return __mem_cgroup_charge(folio, mm, gfp);
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp);
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -923,6 +927,17 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 	return !!(memcg->css.flags & CSS_ONLINE);
 }

+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping);
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg);
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
+/**
+ * User is responsible for providing a buffer @buf of length @len and freeing
+ * it.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);

@@ -1223,6 +1238,38 @@ static inline int mem_cgroup_charge(struct folio *folio,
 	return 0;
 }

+static inline int mem_cgroup_charge_memcg(struct folio *folio,
+					  struct mem_cgroup *memcg,
+					  gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+						struct mem_cgroup *memcg)
+{
+}
+
+static inline struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	return NULL;
+}
+
+static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
+					      size_t len)
+{
+	if (len < 1)
+		return -EINVAL;
+	buf[0] = '\0';
+	return 0;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 781605e920153..b3d8f52a63d17 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
 #include <linux/tracehook.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
+#include <linux/string.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2580,6 +2581,126 @@ void mem_cgroup_handle_over_high(void)
 	css_put(&memcg->css);
 }

+/*
+ * Non error return value must eventually be released with css_put().
+ */
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	static const char procs_filename[] = "/cgroup.procs";
+	struct file *file, *procs;
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg;
+	char *procs_path =
+		kmalloc(strlen(path) + sizeof(procs_filename), GFP_KERNEL);
+
+	if (procs_path == NULL)
+		return ERR_PTR(-ENOMEM);
+	strcpy(procs_path, path);
+	strcat(procs_path, procs_filename);
+
+	procs = filp_open(procs_path, O_WRONLY, 0);
+	kfree(procs_path);
+
+	/*
+	 * Restrict the capability for tasks to mount with memcg charging to the
+	 * cgroup they could not join. For example, disallow:
+	 *
+	 * mount -t tmpfs -o memcg=root-cgroup nodev <MOUNT_DIR>
+	 *
+	 * if it is a non-root task.
+	 */
+	if (IS_ERR(procs))
+		return (struct mem_cgroup *)procs;
+	fput(procs);
+
+	file = filp_open(path, O_DIRECTORY | O_RDONLY, 0);
+	if (IS_ERR(file))
+		return (struct mem_cgroup *)file;
+
+	css = css_tryget_online_from_dir(file->f_path.dentry,
+					 &memory_cgrp_subsys);
+	if (IS_ERR(css))
+		memcg = (struct mem_cgroup *)css;
+	else
+		memcg = container_of(css, struct mem_cgroup, css);
+
+	fput(file);
+	return memcg;
+}
+
+/*
+ * Get the name of the optional charge target memcg associated with @sb.  This
+ * is the cgroup name, not the cgroup path.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
+{
+	struct mem_cgroup *memcg;
+	int ret = 0;
+
+	buf[0] = '\0';
+
+	rcu_read_lock();
+	memcg = rcu_dereference(sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	if (!memcg)
+		return 0;
+
+	ret = cgroup_path(memcg->css.cgroup, buf + len / 2, len / 2);
+	if (ret >= len / 2)
+		strcpy(buf, "?");
+	else {
+		char *p = mangle_path(buf, buf + len / 2, " \t\n\\");
+
+		if (p)
+			*p = '\0';
+		else
+			strcpy(buf, "?");
+	}
+
+	css_put(&memcg->css);
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * Set or clear (if @memcg is NULL) charge association from file system to
+ * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
+ * ensure that the cgroup is not deleted during this operation.
+ */
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_get(&memcg->css);
+	memcg = xchg(target, memcg);
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+/*
+ * Returns the memcg to charge for inode pages.  If non-NULL is returned, caller
+ * must drop reference with css_put().  NULL indicates that the inode does not
+ * have a memcg to charge, so the default process based policy should be used.
+ */
+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	struct mem_cgroup *memcg;
+
+	if (!mapping)
+		return NULL;
+
+	rcu_read_lock();
+	memcg = rcu_dereference(mapping->host->i_sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	return memcg;
+}
+
 static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			unsigned int nr_pages)
 {
@@ -6678,6 +6799,15 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 	return ret;
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+
+	return charge_memcg(folio, memcg, gfp);
+}
+
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
 {
 	struct mem_cgroup *memcg;
diff --git a/mm/shmem.c b/mm/shmem.c
index 23c91a8beb781..68f5f45f40cbc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -115,10 +115,14 @@ struct shmem_options {
 	bool full_inums;
 	int huge;
 	int seen;
+#if CONFIG_MEMCG
+	struct mem_cgroup *memcg;
+#endif
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
+#define SHMEM_SEEN_MEMCG 16
 };

 #ifdef CONFIG_TMPFS
@@ -697,6 +701,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	unsigned long i = 0;
 	unsigned long nr = compound_nr(page);
 	int error;
+	struct mem_cgroup *remote_memcg;

 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -709,7 +714,14 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->index = index;

 	if (!PageSwapCache(page)) {
-		error = mem_cgroup_charge(page_folio(page), charge_mm, gfp);
+		remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+		if (remote_memcg) {
+			error = mem_cgroup_charge_memcg(page_folio(page),
+							remote_memcg, gfp);
+			css_put(&remote_memcg->css);
+		} else
+			error = mem_cgroup_charge(page_folio(page), charge_mm,
+						  gfp);
 		if (error) {
 			if (PageTransHuge(page)) {
 				count_vm_event(THP_FILE_FALLBACK);
@@ -1822,6 +1834,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	int error;
 	int once = 0;
 	int alloced = 0;
+	struct mem_cgroup *remote_memcg, *old_memcg = NULL;

 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
@@ -1834,8 +1847,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	sbinfo = SHMEM_SB(inode->i_sb);
 	charge_mm = vma ? vma->vm_mm : NULL;

+	/*
+	 * If we're doing a remote charge here, set the active_memcg as the
+	 * remote memcg, so that eventually if pagecache_get_page() calls into
+	 * filemap_add_folio(), we charge the correct memcg.
+	 */
+	remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+	if (remote_memcg)
+		old_memcg = set_active_memcg(remote_memcg);
+
 	page = pagecache_get_page(mapping, index,
 					FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
+	if (old_memcg) {
+		set_active_memcg(old_memcg);
+		css_put(&remote_memcg->css);
+	}

 	if (page && vma && userfaultfd_minor(vma)) {
 		if (!xa_is_value(page)) {
@@ -3342,6 +3368,7 @@ static const struct export_operations shmem_export_ops = {
 enum shmem_param {
 	Opt_gid,
 	Opt_huge,
+	Opt_memcg,
 	Opt_mode,
 	Opt_mpol,
 	Opt_nr_blocks,
@@ -3363,6 +3390,7 @@ static const struct constant_table shmem_param_enums_huge[] = {
 const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_u32   ("gid",		Opt_gid),
 	fsparam_enum  ("huge",		Opt_huge,  shmem_param_enums_huge),
+	fsparam_string("memcg",		Opt_memcg),
 	fsparam_u32oct("mode",		Opt_mode),
 	fsparam_string("mpol",		Opt_mpol),
 	fsparam_string("nr_blocks",	Opt_nr_blocks),
@@ -3379,6 +3407,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 	struct shmem_options *ctx = fc->fs_private;
 	struct fs_parse_result result;
 	unsigned long long size;
+	struct mem_cgroup *memcg;
 	char *rest;
 	int opt;

@@ -3412,6 +3441,17 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_INODES;
 		break;
+#ifdef CONFIG_MEMCG
+	case Opt_memcg:
+		if (ctx->memcg)
+			css_put(&ctx->memcg->css);
+		memcg = mem_cgroup_get_from_path(param->string);
+		if (IS_ERR(memcg))
+			goto bad_value;
+		ctx->memcg = memcg;
+		ctx->seen |= SHMEM_SEEN_MEMCG;
+		break;
+#endif
 	case Opt_mode:
 		ctx->mode = result.uint_32 & 07777;
 		break;
@@ -3573,6 +3613,14 @@ static int shmem_reconfigure(struct fs_context *fc)
 	}
 	raw_spin_unlock(&sbinfo->stat_lock);
 	mpol_put(mpol);
+#if CONFIG_MEMCG
+	if (ctx->seen & SHMEM_SEEN_MEMCG && ctx->memcg) {
+		mem_cgroup_set_charge_target(&fc->root->d_sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif
 	return 0;
 out:
 	raw_spin_unlock(&sbinfo->stat_lock);
@@ -3582,6 +3630,11 @@ static int shmem_reconfigure(struct fs_context *fc)
 static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
+	int err;
+	char *buf = __getname();
+
+	if (!buf)
+		return -ENOMEM;

 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
@@ -3625,7 +3678,13 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
 #endif
 	shmem_show_mpol(seq, sbinfo->mpol);
-	return 0;
+	/* Memory cgroup binding: memcg=cgroup_name */
+	err = mem_cgroup_get_name_from_sb(root->d_sb, buf, PATH_MAX);
+	if (!err && buf[0] != '\0')
+		seq_printf(seq, ",memcg=%s", buf);
+
+	__putname(buf);
+	return err;
 }

 #endif /* CONFIG_TMPFS */
@@ -3710,6 +3769,14 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_flags |= SB_POSIXACL;
 #endif
 	uuid_gen(&sb->s_uuid);
+#if CONFIG_MEMCG
+	if (ctx->memcg) {
+		mem_cgroup_set_charge_target(&sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif

 	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (!inode)
--
2.34.0.rc0.344.g81b53c2807-goog


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel-ebMLmSuQjDVBDgjK7y7TUQ,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

Add memcg= option to shmem mount.

Users can specify this option at mount time and all data page charges
will be charged to the memcg supplied. Processes are only allowed to
direct tmpfs changes to a cgroup that they themselves can enter and
allocate memory in.

Signed-off-by: Mina Almasry <almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Cc: Michal Hocko <mhocko-IBi9RG/b67k@public.gmane.org>
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>
Cc: Greg Thelen <gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
CC: Roman Gushchin <guro-b10kYP2dOMg@public.gmane.org>
Cc: Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vladimir Davydov <vdavydov.dev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Cc: riel-ebMLmSuQjDVBDgjK7y7TUQ@public.gmane.org
Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

---

Changes in v2:
- Fixed Roman's email.
- Added a new wrapper around charge_memcg() instead of __mem_cgroup_charge()
- Merged the permission check into this patch as Roman suggested.
- Instead of checking for a s_memcg_to_charge off the superblock in the
filemap code, I set_active_memcg() before calling into the fs generic
code as Dave suggests.
- I have kept the s_memcg_to_charge in the superblock to keep the
struct address_space pointer small and preserve the remount use case..

---
 fs/super.c                 |   3 +
 include/linux/fs.h         |   5 ++
 include/linux/memcontrol.h |  47 ++++++++++++++
 mm/memcontrol.c            | 130 +++++++++++++++++++++++++++++++++++++
 mm/shmem.c                 |  71 +++++++++++++++++++-
 5 files changed, 254 insertions(+), 2 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 3bfc0f8fbd5bc..8aafe5e4e6200 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/memcontrol.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
@@ -180,6 +181,7 @@ static void destroy_unused_super(struct super_block *s)
 	up_write(&s->s_umount);
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
+	mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 	security_sb_free(s);
 	put_user_ns(s->s_user_ns);
 	kfree(s->s_subtype);
@@ -292,6 +294,7 @@ static void __put_super(struct super_block *s)
 		WARN_ON(s->s_dentry_lru.node);
 		WARN_ON(s->s_inode_lru.node);
 		WARN_ON(!list_empty(&s->s_mounts));
+		mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
 		security_sb_free(s);
 		fscrypt_sb_free(s);
 		put_user_ns(s->s_user_ns);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3afca821df32e..59407b3e7aee3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1567,6 +1567,11 @@ struct super_block {
 	struct workqueue_struct *s_dio_done_wq;
 	struct hlist_head s_pins;

+#ifdef CONFIG_MEMCG
+	/* memcg to charge for pages allocated to this filesystem */
+	struct mem_cgroup *s_memcg_to_charge;
+#endif
+
 	/*
 	 * Owning user namespace and default context in which to
 	 * interpret filesystem uids, gids, quotas, device nodes,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c5c403f4be6b..866904afd3563 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,6 +27,7 @@ struct obj_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
+struct super_block;

 /* Cgroup-specific page state, on top of universal node page state */
 enum memcg_stat_item {
@@ -713,6 +714,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 	return __mem_cgroup_charge(folio, mm, gfp);
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp);
+
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -923,6 +927,17 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 	return !!(memcg->css.flags & CSS_ONLINE);
 }

+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping);
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg);
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
+/**
+ * User is responsible for providing a buffer @buf of length @len and freeing
+ * it.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);

@@ -1223,6 +1238,38 @@ static inline int mem_cgroup_charge(struct folio *folio,
 	return 0;
 }

+static inline int mem_cgroup_charge_memcg(struct folio *folio,
+					  struct mem_cgroup *memcg,
+					  gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+						struct mem_cgroup *memcg)
+{
+}
+
+static inline struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	return NULL;
+}
+
+static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
+					      size_t len)
+{
+	if (len < 1)
+		return -EINVAL;
+	buf[0] = '\0';
+	return 0;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 781605e920153..b3d8f52a63d17 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
 #include <linux/tracehook.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
+#include <linux/string.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2580,6 +2581,126 @@ void mem_cgroup_handle_over_high(void)
 	css_put(&memcg->css);
 }

+/*
+ * Non error return value must eventually be released with css_put().
+ */
+struct mem_cgroup *mem_cgroup_get_from_path(const char *path)
+{
+	static const char procs_filename[] = "/cgroup.procs";
+	struct file *file, *procs;
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg;
+	char *procs_path =
+		kmalloc(strlen(path) + sizeof(procs_filename), GFP_KERNEL);
+
+	if (procs_path == NULL)
+		return ERR_PTR(-ENOMEM);
+	strcpy(procs_path, path);
+	strcat(procs_path, procs_filename);
+
+	procs = filp_open(procs_path, O_WRONLY, 0);
+	kfree(procs_path);
+
+	/*
+	 * Restrict the capability for tasks to mount with memcg charging to the
+	 * cgroup they could not join. For example, disallow:
+	 *
+	 * mount -t tmpfs -o memcg=root-cgroup nodev <MOUNT_DIR>
+	 *
+	 * if it is a non-root task.
+	 */
+	if (IS_ERR(procs))
+		return (struct mem_cgroup *)procs;
+	fput(procs);
+
+	file = filp_open(path, O_DIRECTORY | O_RDONLY, 0);
+	if (IS_ERR(file))
+		return (struct mem_cgroup *)file;
+
+	css = css_tryget_online_from_dir(file->f_path.dentry,
+					 &memory_cgrp_subsys);
+	if (IS_ERR(css))
+		memcg = (struct mem_cgroup *)css;
+	else
+		memcg = container_of(css, struct mem_cgroup, css);
+
+	fput(file);
+	return memcg;
+}
+
+/*
+ * Get the name of the optional charge target memcg associated with @sb.  This
+ * is the cgroup name, not the cgroup path.
+ */
+int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
+{
+	struct mem_cgroup *memcg;
+	int ret = 0;
+
+	buf[0] = '\0';
+
+	rcu_read_lock();
+	memcg = rcu_dereference(sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	if (!memcg)
+		return 0;
+
+	ret = cgroup_path(memcg->css.cgroup, buf + len / 2, len / 2);
+	if (ret >= len / 2)
+		strcpy(buf, "?");
+	else {
+		char *p = mangle_path(buf, buf + len / 2, " \t\n\\");
+
+		if (p)
+			*p = '\0';
+		else
+			strcpy(buf, "?");
+	}
+
+	css_put(&memcg->css);
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * Set or clear (if @memcg is NULL) charge association from file system to
+ * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
+ * ensure that the cgroup is not deleted during this operation.
+ */
+void mem_cgroup_set_charge_target(struct mem_cgroup **target,
+				  struct mem_cgroup *memcg)
+{
+	if (memcg)
+		css_get(&memcg->css);
+	memcg = xchg(target, memcg);
+	if (memcg)
+		css_put(&memcg->css);
+}
+
+/*
+ * Returns the memcg to charge for inode pages.  If non-NULL is returned, caller
+ * must drop reference with css_put().  NULL indicates that the inode does not
+ * have a memcg to charge, so the default process based policy should be used.
+ */
+struct mem_cgroup *
+mem_cgroup_mapping_get_charge_target(struct address_space *mapping)
+{
+	struct mem_cgroup *memcg;
+
+	if (!mapping)
+		return NULL;
+
+	rcu_read_lock();
+	memcg = rcu_dereference(mapping->host->i_sb->s_memcg_to_charge);
+	if (memcg && !css_tryget_online(&memcg->css))
+		memcg = NULL;
+	rcu_read_unlock();
+
+	return memcg;
+}
+
 static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			unsigned int nr_pages)
 {
@@ -6678,6 +6799,15 @@ static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 	return ret;
 }

+int mem_cgroup_charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			    gfp_t gfp)
+{
+	if (mem_cgroup_disabled())
+		return 0;
+
+	return charge_memcg(folio, memcg, gfp);
+}
+
 int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
 {
 	struct mem_cgroup *memcg;
diff --git a/mm/shmem.c b/mm/shmem.c
index 23c91a8beb781..68f5f45f40cbc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -115,10 +115,14 @@ struct shmem_options {
 	bool full_inums;
 	int huge;
 	int seen;
+#if CONFIG_MEMCG
+	struct mem_cgroup *memcg;
+#endif
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
+#define SHMEM_SEEN_MEMCG 16
 };

 #ifdef CONFIG_TMPFS
@@ -697,6 +701,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	unsigned long i = 0;
 	unsigned long nr = compound_nr(page);
 	int error;
+	struct mem_cgroup *remote_memcg;

 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -709,7 +714,14 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->index = index;

 	if (!PageSwapCache(page)) {
-		error = mem_cgroup_charge(page_folio(page), charge_mm, gfp);
+		remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+		if (remote_memcg) {
+			error = mem_cgroup_charge_memcg(page_folio(page),
+							remote_memcg, gfp);
+			css_put(&remote_memcg->css);
+		} else
+			error = mem_cgroup_charge(page_folio(page), charge_mm,
+						  gfp);
 		if (error) {
 			if (PageTransHuge(page)) {
 				count_vm_event(THP_FILE_FALLBACK);
@@ -1822,6 +1834,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	int error;
 	int once = 0;
 	int alloced = 0;
+	struct mem_cgroup *remote_memcg, *old_memcg = NULL;

 	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
@@ -1834,8 +1847,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	sbinfo = SHMEM_SB(inode->i_sb);
 	charge_mm = vma ? vma->vm_mm : NULL;

+	/*
+	 * If we're doing a remote charge here, set the active_memcg as the
+	 * remote memcg, so that eventually if pagecache_get_page() calls into
+	 * filemap_add_folio(), we charge the correct memcg.
+	 */
+	remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
+	if (remote_memcg)
+		old_memcg = set_active_memcg(remote_memcg);
+
 	page = pagecache_get_page(mapping, index,
 					FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
+	if (old_memcg) {
+		set_active_memcg(old_memcg);
+		css_put(&remote_memcg->css);
+	}

 	if (page && vma && userfaultfd_minor(vma)) {
 		if (!xa_is_value(page)) {
@@ -3342,6 +3368,7 @@ static const struct export_operations shmem_export_ops = {
 enum shmem_param {
 	Opt_gid,
 	Opt_huge,
+	Opt_memcg,
 	Opt_mode,
 	Opt_mpol,
 	Opt_nr_blocks,
@@ -3363,6 +3390,7 @@ static const struct constant_table shmem_param_enums_huge[] = {
 const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_u32   ("gid",		Opt_gid),
 	fsparam_enum  ("huge",		Opt_huge,  shmem_param_enums_huge),
+	fsparam_string("memcg",		Opt_memcg),
 	fsparam_u32oct("mode",		Opt_mode),
 	fsparam_string("mpol",		Opt_mpol),
 	fsparam_string("nr_blocks",	Opt_nr_blocks),
@@ -3379,6 +3407,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 	struct shmem_options *ctx = fc->fs_private;
 	struct fs_parse_result result;
 	unsigned long long size;
+	struct mem_cgroup *memcg;
 	char *rest;
 	int opt;

@@ -3412,6 +3441,17 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_INODES;
 		break;
+#ifdef CONFIG_MEMCG
+	case Opt_memcg:
+		if (ctx->memcg)
+			css_put(&ctx->memcg->css);
+		memcg = mem_cgroup_get_from_path(param->string);
+		if (IS_ERR(memcg))
+			goto bad_value;
+		ctx->memcg = memcg;
+		ctx->seen |= SHMEM_SEEN_MEMCG;
+		break;
+#endif
 	case Opt_mode:
 		ctx->mode = result.uint_32 & 07777;
 		break;
@@ -3573,6 +3613,14 @@ static int shmem_reconfigure(struct fs_context *fc)
 	}
 	raw_spin_unlock(&sbinfo->stat_lock);
 	mpol_put(mpol);
+#if CONFIG_MEMCG
+	if (ctx->seen & SHMEM_SEEN_MEMCG && ctx->memcg) {
+		mem_cgroup_set_charge_target(&fc->root->d_sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif
 	return 0;
 out:
 	raw_spin_unlock(&sbinfo->stat_lock);
@@ -3582,6 +3630,11 @@ static int shmem_reconfigure(struct fs_context *fc)
 static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
+	int err;
+	char *buf = __getname();
+
+	if (!buf)
+		return -ENOMEM;

 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
@@ -3625,7 +3678,13 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
 #endif
 	shmem_show_mpol(seq, sbinfo->mpol);
-	return 0;
+	/* Memory cgroup binding: memcg=cgroup_name */
+	err = mem_cgroup_get_name_from_sb(root->d_sb, buf, PATH_MAX);
+	if (!err && buf[0] != '\0')
+		seq_printf(seq, ",memcg=%s", buf);
+
+	__putname(buf);
+	return err;
 }

 #endif /* CONFIG_TMPFS */
@@ -3710,6 +3769,14 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_flags |= SB_POSIXACL;
 #endif
 	uuid_gen(&sb->s_uuid);
+#if CONFIG_MEMCG
+	if (ctx->memcg) {
+		mem_cgroup_set_charge_target(&sb->s_memcg_to_charge,
+					     ctx->memcg);
+		css_put(&ctx->memcg->css);
+		ctx->memcg = NULL;
+	}
+#endif

 	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (!inode)

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/4] mm/oom: handle remote ooms
       [not found] <20211110211951.3730787-1-almasrymina@google.com>
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  3 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

On remote ooms (OOMs due to remote charging), the oom-killer will attempt
to find a task to kill in the memcg under oom, if the oom-killer
is unable to find one, the oom-killer should simply return ENOMEM to the
allocating process.

If we're in pagefault path and we're unable to return ENOMEM to the
allocating process, we instead kill the allocating process.

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
CC: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---

Changes in v2:
- Moved the remote oom handling as Roman requested.
- Used mem_cgroup_from_task(current) instead of grabbing the memcg from
current->mm

---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 29 +++++++++++++++++++++++++++++
 mm/oom_kill.c              | 22 ++++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 866904afd3563..ae4686abd4d32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -937,6 +937,7 @@ struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
  * it.
  */
 int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom);

 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);
@@ -1270,6 +1271,11 @@ static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
 	return 0;
 }

+static inline bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	return false;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b3d8f52a63d17..8019c396bfdd9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2664,6 +2664,35 @@ int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
 	return ret < 0 ? ret : 0;
 }

+/*
+ * Returns true if current's mm is a descendant of the memcg_under_oom (or
+ * equal to it). False otherwise. This is used by the oom-killer to detect
+ * ooms due to remote charging.
+ */
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	struct mem_cgroup *current_memcg;
+	bool is_remote_oom;
+
+	if (!memcg_under_oom)
+		return false;
+
+	rcu_read_lock();
+	current_memcg = mem_cgroup_from_task(current);
+	if (current_memcg && !css_tryget_online(&current_memcg->css))
+		current_memcg = NULL;
+	rcu_read_unlock();
+
+	if (!current_memcg)
+		return false;
+
+	is_remote_oom =
+		!mem_cgroup_is_descendant(current_memcg, memcg_under_oom);
+	css_put(&current_memcg->css);
+
+	return is_remote_oom;
+}
+
 /*
  * Set or clear (if @memcg is NULL) charge association from file system to
  * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0a7e16b16b8c3..0e0097a0aed45 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1108,6 +1108,28 @@ bool out_of_memory(struct oom_control *oc)
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
+		if (is_remote_oom(oc->memcg)) {
+			/*
+			 * For remote ooms in userfaults, we have no choice but
+			 * to kill the allocating process.
+			 */
+			if (current->in_user_fault &&
+			    !oom_unkillable_task(current)) {
+				get_task_struct(current);
+				oc->chosen = current;
+				oom_kill_process(
+					oc,
+					"Out of memory (Killing remote allocating task)");
+				return true;
+			}
+
+			/*
+			 * For remote ooms in non-userfaults, simply return
+			 * ENOMEM to the caller.
+			 */
+			return false;
+		}
+
 		dump_header(oc, NULL);
 		pr_warn("Out of memory and no killable processes...\n");
 		/*
--
2.34.0.rc0.344.g81b53c2807-goog

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/4] mm/oom: handle remote ooms
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

On remote ooms (OOMs due to remote charging), the oom-killer will attempt
to find a task to kill in the memcg under oom, if the oom-killer
is unable to find one, the oom-killer should simply return ENOMEM to the
allocating process.

If we're in pagefault path and we're unable to return ENOMEM to the
allocating process, we instead kill the allocating process.

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
CC: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---

Changes in v2:
- Moved the remote oom handling as Roman requested.
- Used mem_cgroup_from_task(current) instead of grabbing the memcg from
current->mm

---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 29 +++++++++++++++++++++++++++++
 mm/oom_kill.c              | 22 ++++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 866904afd3563..ae4686abd4d32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -937,6 +937,7 @@ struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
  * it.
  */
 int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom);

 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);
@@ -1270,6 +1271,11 @@ static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
 	return 0;
 }

+static inline bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	return false;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b3d8f52a63d17..8019c396bfdd9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2664,6 +2664,35 @@ int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
 	return ret < 0 ? ret : 0;
 }

+/*
+ * Returns true if current's mm is a descendant of the memcg_under_oom (or
+ * equal to it). False otherwise. This is used by the oom-killer to detect
+ * ooms due to remote charging.
+ */
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	struct mem_cgroup *current_memcg;
+	bool is_remote_oom;
+
+	if (!memcg_under_oom)
+		return false;
+
+	rcu_read_lock();
+	current_memcg = mem_cgroup_from_task(current);
+	if (current_memcg && !css_tryget_online(&current_memcg->css))
+		current_memcg = NULL;
+	rcu_read_unlock();
+
+	if (!current_memcg)
+		return false;
+
+	is_remote_oom =
+		!mem_cgroup_is_descendant(current_memcg, memcg_under_oom);
+	css_put(&current_memcg->css);
+
+	return is_remote_oom;
+}
+
 /*
  * Set or clear (if @memcg is NULL) charge association from file system to
  * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0a7e16b16b8c3..0e0097a0aed45 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1108,6 +1108,28 @@ bool out_of_memory(struct oom_control *oc)
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
+		if (is_remote_oom(oc->memcg)) {
+			/*
+			 * For remote ooms in userfaults, we have no choice but
+			 * to kill the allocating process.
+			 */
+			if (current->in_user_fault &&
+			    !oom_unkillable_task(current)) {
+				get_task_struct(current);
+				oc->chosen = current;
+				oom_kill_process(
+					oc,
+					"Out of memory (Killing remote allocating task)");
+				return true;
+			}
+
+			/*
+			 * For remote ooms in non-userfaults, simply return
+			 * ENOMEM to the caller.
+			 */
+			return false;
+		}
+
 		dump_header(oc, NULL);
 		pr_warn("Out of memory and no killable processes...\n");
 		/*
--
2.34.0.rc0.344.g81b53c2807-goog


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/4] mm/oom: handle remote ooms
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel-ebMLmSuQjDVBDgjK7y7TUQ,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

On remote ooms (OOMs due to remote charging), the oom-killer will attempt
to find a task to kill in the memcg under oom, if the oom-killer
is unable to find one, the oom-killer should simply return ENOMEM to the
allocating process.

If we're in pagefault path and we're unable to return ENOMEM to the
allocating process, we instead kill the allocating process.

Signed-off-by: Mina Almasry <almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Cc: Michal Hocko <mhocko-IBi9RG/b67k@public.gmane.org>
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>
Cc: Greg Thelen <gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
CC: Roman Gushchin <guro-b10kYP2dOMg@public.gmane.org>
Cc: Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vladimir Davydov <vdavydov.dev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Cc: riel-ebMLmSuQjDVBDgjK7y7TUQ@public.gmane.org
Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

---

Changes in v2:
- Moved the remote oom handling as Roman requested.
- Used mem_cgroup_from_task(current) instead of grabbing the memcg from
current->mm

---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 29 +++++++++++++++++++++++++++++
 mm/oom_kill.c              | 22 ++++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 866904afd3563..ae4686abd4d32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -937,6 +937,7 @@ struct mem_cgroup *mem_cgroup_get_from_path(const char *path);
  * it.
  */
 int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len);
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom);

 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 		int zid, int nr_pages);
@@ -1270,6 +1271,11 @@ static inline int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf,
 	return 0;
 }

+static inline bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	return false;
+}
+
 static inline int mem_cgroup_swapin_charge_page(struct page *page,
 			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b3d8f52a63d17..8019c396bfdd9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2664,6 +2664,35 @@ int mem_cgroup_get_name_from_sb(struct super_block *sb, char *buf, size_t len)
 	return ret < 0 ? ret : 0;
 }

+/*
+ * Returns true if current's mm is a descendant of the memcg_under_oom (or
+ * equal to it). False otherwise. This is used by the oom-killer to detect
+ * ooms due to remote charging.
+ */
+bool is_remote_oom(struct mem_cgroup *memcg_under_oom)
+{
+	struct mem_cgroup *current_memcg;
+	bool is_remote_oom;
+
+	if (!memcg_under_oom)
+		return false;
+
+	rcu_read_lock();
+	current_memcg = mem_cgroup_from_task(current);
+	if (current_memcg && !css_tryget_online(&current_memcg->css))
+		current_memcg = NULL;
+	rcu_read_unlock();
+
+	if (!current_memcg)
+		return false;
+
+	is_remote_oom =
+		!mem_cgroup_is_descendant(current_memcg, memcg_under_oom);
+	css_put(&current_memcg->css);
+
+	return is_remote_oom;
+}
+
 /*
  * Set or clear (if @memcg is NULL) charge association from file system to
  * memcg.  If @memcg != NULL, then a css reference must be held by the caller to
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0a7e16b16b8c3..0e0097a0aed45 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1108,6 +1108,28 @@ bool out_of_memory(struct oom_control *oc)
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
+		if (is_remote_oom(oc->memcg)) {
+			/*
+			 * For remote ooms in userfaults, we have no choice but
+			 * to kill the allocating process.
+			 */
+			if (current->in_user_fault &&
+			    !oom_unkillable_task(current)) {
+				get_task_struct(current);
+				oc->chosen = current;
+				oom_kill_process(
+					oc,
+					"Out of memory (Killing remote allocating task)");
+				return true;
+			}
+
+			/*
+			 * For remote ooms in non-userfaults, simply return
+			 * ENOMEM to the caller.
+			 */
+			return false;
+		}
+
 		dump_header(oc, NULL);
 		pr_warn("Out of memory and no killable processes...\n");
 		/*

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/4] mm, shmem: add tmpfs memcg= option documentation
       [not found] <20211110211951.3730787-1-almasrymina@google.com>
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  3 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---
 Documentation/filesystems/tmpfs.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 0408c245785e3..1ab04e8fa9222 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -137,6 +137,23 @@ mount options.  It can be added later, when the tmpfs is already mounted
 on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.


+If CONFIG_MEMCG is enabled, tmpfs has a mount option to specify the memory
+cgroup to be charged for page allocations.
+
+memcg=/sys/fs/cgroup/unified/test/: data page allocations are charged to
+cgroup /sys/fs/cgroup/unified/test/.
+
+When charging memory to the remote memcg (memcg specified with memcg=) and
+hitting the limit, the oom-killer will be invoked and will attempt to kill
+a process in the remote memcg. If no such processes are found, the remote
+charging process gets an ENOMEM. If the remote charging process is in the
+pagefault path, it gets killed.
+
+Only processes that have access to /sys/fs/cgroup/unified/test/cgroup.procs can
+mount a tmpfs with memcg=/sys/fs/cgroup/unified/test. Thus, a process is able
+to charge memory to a cgroup only if it itself is able to enter that cgroup.
+
+
 To specify the initial root directory you can use the following mount
 options:

--
2.34.0.rc0.344.g81b53c2807-goog

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/4] mm, shmem: add tmpfs memcg= option documentation
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---
 Documentation/filesystems/tmpfs.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 0408c245785e3..1ab04e8fa9222 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -137,6 +137,23 @@ mount options.  It can be added later, when the tmpfs is already mounted
 on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.


+If CONFIG_MEMCG is enabled, tmpfs has a mount option to specify the memory
+cgroup to be charged for page allocations.
+
+memcg=/sys/fs/cgroup/unified/test/: data page allocations are charged to
+cgroup /sys/fs/cgroup/unified/test/.
+
+When charging memory to the remote memcg (memcg specified with memcg=) and
+hitting the limit, the oom-killer will be invoked and will attempt to kill
+a process in the remote memcg. If no such processes are found, the remote
+charging process gets an ENOMEM. If the remote charging process is in the
+pagefault path, it gets killed.
+
+Only processes that have access to /sys/fs/cgroup/unified/test/cgroup.procs can
+mount a tmpfs with memcg=/sys/fs/cgroup/unified/test. Thus, a process is able
+to charge memory to a cgroup only if it itself is able to enter that cgroup.
+
+
 To specify the initial root directory you can use the following mount
 options:

--
2.34.0.rc0.344.g81b53c2807-goog


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/4] mm, shmem: add tmpfs memcg= option documentation
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel-ebMLmSuQjDVBDgjK7y7TUQ,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

Signed-off-by: Mina Almasry <almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Cc: Michal Hocko <mhocko-IBi9RG/b67k@public.gmane.org>
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>
Cc: Greg Thelen <gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Roman Gushchin <guro-b10kYP2dOMg@public.gmane.org>
Cc: Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vladimir Davydov <vdavydov.dev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Cc: riel-ebMLmSuQjDVBDgjK7y7TUQ@public.gmane.org
Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

---
 Documentation/filesystems/tmpfs.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 0408c245785e3..1ab04e8fa9222 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -137,6 +137,23 @@ mount options.  It can be added later, when the tmpfs is already mounted
 on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.


+If CONFIG_MEMCG is enabled, tmpfs has a mount option to specify the memory
+cgroup to be charged for page allocations.
+
+memcg=/sys/fs/cgroup/unified/test/: data page allocations are charged to
+cgroup /sys/fs/cgroup/unified/test/.
+
+When charging memory to the remote memcg (memcg specified with memcg=) and
+hitting the limit, the oom-killer will be invoked and will attempt to kill
+a process in the remote memcg. If no such processes are found, the remote
+charging process gets an ENOMEM. If the remote charging process is in the
+pagefault path, it gets killed.
+
+Only processes that have access to /sys/fs/cgroup/unified/test/cgroup.procs can
+mount a tmpfs with memcg=/sys/fs/cgroup/unified/test. Thus, a process is able
+to charge memory to a cgroup only if it itself is able to enter that cgroup.
+
+
 To specify the initial root directory you can use the following mount
 options:

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 4/4] mm, shmem, selftests: add tmpfs memcg= mount option tests
       [not found] <20211110211951.3730787-1-almasrymina@google.com>
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  2021-11-10 21:19   ` Mina Almasry
  3 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---
 tools/testing/selftests/vm/.gitignore     |   1 +
 tools/testing/selftests/vm/mmap_write.c   | 103 ++++++++++++++++++++++
 tools/testing/selftests/vm/tmpfs-memcg.sh |  87 ++++++++++++++++++
 3 files changed, 191 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mmap_write.c
 create mode 100755 tools/testing/selftests/vm/tmpfs-memcg.sh

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e852828..cb229974c5f15 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -19,6 +19,7 @@ madv_populate
 userfaultfd
 mlock-intersect-test
 mlock-random-test
+mmap_write
 virtual_address_range
 gup_test
 va_128TBswitch
diff --git a/tools/testing/selftests/vm/mmap_write.c b/tools/testing/selftests/vm/mmap_write.c
new file mode 100644
index 0000000000000..88a8468f2128c
--- /dev/null
+++ b/tools/testing/selftests/vm/mmap_write.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program faults memory in tmpfs
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/* Global definitions. */
+
+/* Global variables. */
+static const char *self;
+static char *shmaddr;
+static int shmid;
+
+/*
+ * Show usage and exit.
+ */
+static void exit_usage(void)
+{
+	printf("Usage: %s -p <path to tmpfs file> -s <size to map>\n", self);
+	exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+	int fd = 0;
+	int key = 0;
+	int *ptr = NULL;
+	int c = 0;
+	int size = 0;
+	char path[256] = "";
+	int want_sleep = 0, private = 0;
+	int populate = 0;
+	int write = 0;
+	int reserve = 1;
+
+	/* Parse command-line arguments. */
+	setvbuf(stdout, NULL, _IONBF, 0);
+	self = argv[0];
+
+	while ((c = getopt(argc, argv, ":s:p:")) != -1) {
+		switch (c) {
+		case 's':
+			size = atoi(optarg);
+			break;
+		case 'p':
+			strncpy(path, optarg, sizeof(path));
+			break;
+		default:
+			errno = EINVAL;
+			perror("Invalid arg");
+			exit_usage();
+		}
+	}
+
+	printf("%s\n", path);
+	if (strncmp(path, "", sizeof(path)) != 0) {
+		printf("Writing to this path: %s\n", path);
+	} else {
+		errno = EINVAL;
+		perror("path not found");
+		exit_usage();
+	}
+
+	if (size != 0) {
+		printf("Writing this size: %d\n", size);
+	} else {
+		errno = EINVAL;
+		perror("size not found");
+		exit_usage();
+	}
+
+	fd = open(path, O_CREAT | O_RDWR, 0777);
+	if (fd == -1)
+		err(1, "Failed to open file.");
+
+	if (ftruncate(fd, size))
+		err(1, "failed to ftruncate %s", path);
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (ptr == MAP_FAILED) {
+		close(fd);
+		err(1, "Error mapping the file");
+	}
+
+	printf("Writing to memory.\n");
+	memset(ptr, 1, size);
+	printf("Done writing to memory.\n");
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/tmpfs-memcg.sh b/tools/testing/selftests/vm/tmpfs-memcg.sh
new file mode 100755
index 0000000000000..eb584ddcbae5f
--- /dev/null
+++ b/tools/testing/selftests/vm/tmpfs-memcg.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CGROUP_PATH=/dev/cgroup/memory/tmpfs-memcg-test
+
+function cleanup() {
+  rm -rf /mnt/tmpfs/*
+  umount /mnt/tmpfs
+  rm -rf /mnt/tmpfs
+
+  rmdir $CGROUP_PATH
+
+  echo CLEANUP DONE
+}
+
+function setup() {
+  mkdir -p $CGROUP_PATH
+  echo $((10 * 1024 * 1024)) > $CGROUP_PATH/memory.limit_in_bytes
+  echo 0 > $CGROUP_PATH/cpuset.cpus
+  echo 0 > $CGROUP_PATH/cpuset.mems
+
+  mkdir -p /mnt/tmpfs
+
+  echo SETUP DONE
+}
+
+function expect_equal() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" != "$expected" ]]; then
+    echo "expected ($expected) != actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+function expect_ge() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" -lt "$expected" ]]; then
+    echo "expected ($expected) < actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+cleanup
+setup
+
+mount -t tmpfs -o memcg=$CGROUP_PATH tmpfs /mnt/tmpfs
+
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_equal 0 "$TARGET_MEMCG_USAGE" "Before echo, memcg usage should be 0"
+
+# Echo to allocate a page in the tmpfs
+echo hello > /mnt/tmpfs/test
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge 4096 "$TARGET_MEMCG_USAGE" "After echo, memcg usage should be greater than 4096"
+echo "Echo test succeeded"
+
+tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((1 * 1024 * 1024))
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge $((1 * 1024 * 1024)) "$TARGET_MEMCG_USAGE" "After echo, memcg usage should greater than 1MB"
+echo "Write succeeded"
+
+# OOM the remote container on pagefault.
+echo
+echo
+echo "OOMing the remote container using pagefault."
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually be OOM-killed by 'Out of memory (Killing remote allocating task)'"
+#tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((11 * 1024 * 1024))
+
+# OOM the remote container on non pagefault.
+echo
+echo
+echo "OOMing the remote container using cat (non-pagefault)"
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually the cat command should receive an ENOMEM"
+cat /dev/random > /mnt/tmpfs/random
+
+cleanup
+echo SUCCESS
--
2.34.0.rc0.344.g81b53c2807-goog

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 4/4] mm, shmem, selftests: add tmpfs memcg= mount option tests
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel, linux-mm, linux-fsdevel, cgroups

Signed-off-by: Mina Almasry <almasrymina@google.com>

Cc: Michal Hocko <mhocko@suse.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: riel@surriel.com
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org
Cc: cgroups@vger.kernel.org

---
 tools/testing/selftests/vm/.gitignore     |   1 +
 tools/testing/selftests/vm/mmap_write.c   | 103 ++++++++++++++++++++++
 tools/testing/selftests/vm/tmpfs-memcg.sh |  87 ++++++++++++++++++
 3 files changed, 191 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mmap_write.c
 create mode 100755 tools/testing/selftests/vm/tmpfs-memcg.sh

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e852828..cb229974c5f15 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -19,6 +19,7 @@ madv_populate
 userfaultfd
 mlock-intersect-test
 mlock-random-test
+mmap_write
 virtual_address_range
 gup_test
 va_128TBswitch
diff --git a/tools/testing/selftests/vm/mmap_write.c b/tools/testing/selftests/vm/mmap_write.c
new file mode 100644
index 0000000000000..88a8468f2128c
--- /dev/null
+++ b/tools/testing/selftests/vm/mmap_write.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program faults memory in tmpfs
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/* Global definitions. */
+
+/* Global variables. */
+static const char *self;
+static char *shmaddr;
+static int shmid;
+
+/*
+ * Show usage and exit.
+ */
+static void exit_usage(void)
+{
+	printf("Usage: %s -p <path to tmpfs file> -s <size to map>\n", self);
+	exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+	int fd = 0;
+	int key = 0;
+	int *ptr = NULL;
+	int c = 0;
+	int size = 0;
+	char path[256] = "";
+	int want_sleep = 0, private = 0;
+	int populate = 0;
+	int write = 0;
+	int reserve = 1;
+
+	/* Parse command-line arguments. */
+	setvbuf(stdout, NULL, _IONBF, 0);
+	self = argv[0];
+
+	while ((c = getopt(argc, argv, ":s:p:")) != -1) {
+		switch (c) {
+		case 's':
+			size = atoi(optarg);
+			break;
+		case 'p':
+			strncpy(path, optarg, sizeof(path));
+			break;
+		default:
+			errno = EINVAL;
+			perror("Invalid arg");
+			exit_usage();
+		}
+	}
+
+	printf("%s\n", path);
+	if (strncmp(path, "", sizeof(path)) != 0) {
+		printf("Writing to this path: %s\n", path);
+	} else {
+		errno = EINVAL;
+		perror("path not found");
+		exit_usage();
+	}
+
+	if (size != 0) {
+		printf("Writing this size: %d\n", size);
+	} else {
+		errno = EINVAL;
+		perror("size not found");
+		exit_usage();
+	}
+
+	fd = open(path, O_CREAT | O_RDWR, 0777);
+	if (fd == -1)
+		err(1, "Failed to open file.");
+
+	if (ftruncate(fd, size))
+		err(1, "failed to ftruncate %s", path);
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (ptr == MAP_FAILED) {
+		close(fd);
+		err(1, "Error mapping the file");
+	}
+
+	printf("Writing to memory.\n");
+	memset(ptr, 1, size);
+	printf("Done writing to memory.\n");
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/tmpfs-memcg.sh b/tools/testing/selftests/vm/tmpfs-memcg.sh
new file mode 100755
index 0000000000000..eb584ddcbae5f
--- /dev/null
+++ b/tools/testing/selftests/vm/tmpfs-memcg.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CGROUP_PATH=/dev/cgroup/memory/tmpfs-memcg-test
+
+function cleanup() {
+  rm -rf /mnt/tmpfs/*
+  umount /mnt/tmpfs
+  rm -rf /mnt/tmpfs
+
+  rmdir $CGROUP_PATH
+
+  echo CLEANUP DONE
+}
+
+function setup() {
+  mkdir -p $CGROUP_PATH
+  echo $((10 * 1024 * 1024)) > $CGROUP_PATH/memory.limit_in_bytes
+  echo 0 > $CGROUP_PATH/cpuset.cpus
+  echo 0 > $CGROUP_PATH/cpuset.mems
+
+  mkdir -p /mnt/tmpfs
+
+  echo SETUP DONE
+}
+
+function expect_equal() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" != "$expected" ]]; then
+    echo "expected ($expected) != actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+function expect_ge() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" -lt "$expected" ]]; then
+    echo "expected ($expected) < actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+cleanup
+setup
+
+mount -t tmpfs -o memcg=$CGROUP_PATH tmpfs /mnt/tmpfs
+
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_equal 0 "$TARGET_MEMCG_USAGE" "Before echo, memcg usage should be 0"
+
+# Echo to allocate a page in the tmpfs
+echo hello > /mnt/tmpfs/test
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge 4096 "$TARGET_MEMCG_USAGE" "After echo, memcg usage should be greater than 4096"
+echo "Echo test succeeded"
+
+tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((1 * 1024 * 1024))
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge $((1 * 1024 * 1024)) "$TARGET_MEMCG_USAGE" "After echo, memcg usage should greater than 1MB"
+echo "Write succeeded"
+
+# OOM the remote container on pagefault.
+echo
+echo
+echo "OOMing the remote container using pagefault."
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually be OOM-killed by 'Out of memory (Killing remote allocating task)'"
+#tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((11 * 1024 * 1024))
+
+# OOM the remote container on non pagefault.
+echo
+echo
+echo "OOMing the remote container using cat (non-pagefault)"
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually the cat command should receive an ENOMEM"
+cat /dev/random > /mnt/tmpfs/random
+
+cleanup
+echo SUCCESS
--
2.34.0.rc0.344.g81b53c2807-goog


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 4/4] mm, shmem, selftests: add tmpfs memcg= mount option tests
@ 2021-11-10 21:19   ` Mina Almasry
  0 siblings, 0 replies; 18+ messages in thread
From: Mina Almasry @ 2021-11-10 21:19 UTC (permalink / raw)
  Cc: Mina Almasry, Michal Hocko, Theodore Ts'o, Greg Thelen,
	Shakeel Butt, Andrew Morton, Hugh Dickins, Roman Gushchin,
	Dave Chinner, Johannes Weiner, Tejun Heo, Vladimir Davydov,
	Muchun Song, riel-ebMLmSuQjDVBDgjK7y7TUQ,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA

Signed-off-by: Mina Almasry <almasrymina-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Cc: Michal Hocko <mhocko-IBi9RG/b67k@public.gmane.org>
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>
Cc: Greg Thelen <gthelen-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Roman Gushchin <guro-b10kYP2dOMg@public.gmane.org>
Cc: Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Hugh Dickins <hughd-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Vladimir Davydov <vdavydov.dev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Muchun Song <songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Cc: riel-ebMLmSuQjDVBDgjK7y7TUQ@public.gmane.org
Cc: linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org
Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

---
 tools/testing/selftests/vm/.gitignore     |   1 +
 tools/testing/selftests/vm/mmap_write.c   | 103 ++++++++++++++++++++++
 tools/testing/selftests/vm/tmpfs-memcg.sh |  87 ++++++++++++++++++
 3 files changed, 191 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mmap_write.c
 create mode 100755 tools/testing/selftests/vm/tmpfs-memcg.sh

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e852828..cb229974c5f15 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -19,6 +19,7 @@ madv_populate
 userfaultfd
 mlock-intersect-test
 mlock-random-test
+mmap_write
 virtual_address_range
 gup_test
 va_128TBswitch
diff --git a/tools/testing/selftests/vm/mmap_write.c b/tools/testing/selftests/vm/mmap_write.c
new file mode 100644
index 0000000000000..88a8468f2128c
--- /dev/null
+++ b/tools/testing/selftests/vm/mmap_write.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program faults memory in tmpfs
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/* Global definitions. */
+
+/* Global variables. */
+static const char *self;
+static char *shmaddr;
+static int shmid;
+
+/*
+ * Show usage and exit.
+ */
+static void exit_usage(void)
+{
+	printf("Usage: %s -p <path to tmpfs file> -s <size to map>\n", self);
+	exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+	int fd = 0;
+	int key = 0;
+	int *ptr = NULL;
+	int c = 0;
+	int size = 0;
+	char path[256] = "";
+	int want_sleep = 0, private = 0;
+	int populate = 0;
+	int write = 0;
+	int reserve = 1;
+
+	/* Parse command-line arguments. */
+	setvbuf(stdout, NULL, _IONBF, 0);
+	self = argv[0];
+
+	while ((c = getopt(argc, argv, ":s:p:")) != -1) {
+		switch (c) {
+		case 's':
+			size = atoi(optarg);
+			break;
+		case 'p':
+			strncpy(path, optarg, sizeof(path));
+			break;
+		default:
+			errno = EINVAL;
+			perror("Invalid arg");
+			exit_usage();
+		}
+	}
+
+	printf("%s\n", path);
+	if (strncmp(path, "", sizeof(path)) != 0) {
+		printf("Writing to this path: %s\n", path);
+	} else {
+		errno = EINVAL;
+		perror("path not found");
+		exit_usage();
+	}
+
+	if (size != 0) {
+		printf("Writing this size: %d\n", size);
+	} else {
+		errno = EINVAL;
+		perror("size not found");
+		exit_usage();
+	}
+
+	fd = open(path, O_CREAT | O_RDWR, 0777);
+	if (fd == -1)
+		err(1, "Failed to open file.");
+
+	if (ftruncate(fd, size))
+		err(1, "failed to ftruncate %s", path);
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (ptr == MAP_FAILED) {
+		close(fd);
+		err(1, "Error mapping the file");
+	}
+
+	printf("Writing to memory.\n");
+	memset(ptr, 1, size);
+	printf("Done writing to memory.\n");
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/tmpfs-memcg.sh b/tools/testing/selftests/vm/tmpfs-memcg.sh
new file mode 100755
index 0000000000000..eb584ddcbae5f
--- /dev/null
+++ b/tools/testing/selftests/vm/tmpfs-memcg.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CGROUP_PATH=/dev/cgroup/memory/tmpfs-memcg-test
+
+function cleanup() {
+  rm -rf /mnt/tmpfs/*
+  umount /mnt/tmpfs
+  rm -rf /mnt/tmpfs
+
+  rmdir $CGROUP_PATH
+
+  echo CLEANUP DONE
+}
+
+function setup() {
+  mkdir -p $CGROUP_PATH
+  echo $((10 * 1024 * 1024)) > $CGROUP_PATH/memory.limit_in_bytes
+  echo 0 > $CGROUP_PATH/cpuset.cpus
+  echo 0 > $CGROUP_PATH/cpuset.mems
+
+  mkdir -p /mnt/tmpfs
+
+  echo SETUP DONE
+}
+
+function expect_equal() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" != "$expected" ]]; then
+    echo "expected ($expected) != actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+function expect_ge() {
+  local expected="$1"
+  local actual="$2"
+  local error="$3"
+
+  if [[ "$actual" -lt "$expected" ]]; then
+    echo "expected ($expected) < actual ($actual): $3" >&2
+    cleanup
+    exit 1
+  fi
+}
+
+cleanup
+setup
+
+mount -t tmpfs -o memcg=$CGROUP_PATH tmpfs /mnt/tmpfs
+
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_equal 0 "$TARGET_MEMCG_USAGE" "Before echo, memcg usage should be 0"
+
+# Echo to allocate a page in the tmpfs
+echo hello > /mnt/tmpfs/test
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge 4096 "$TARGET_MEMCG_USAGE" "After echo, memcg usage should be greater than 4096"
+echo "Echo test succeeded"
+
+tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((1 * 1024 * 1024))
+TARGET_MEMCG_USAGE=$(cat $CGROUP_PATH/memory.usage_in_bytes)
+expect_ge $((1 * 1024 * 1024)) "$TARGET_MEMCG_USAGE" "After echo, memcg usage should greater than 1MB"
+echo "Write succeeded"
+
+# OOM the remote container on pagefault.
+echo
+echo
+echo "OOMing the remote container using pagefault."
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually be OOM-killed by 'Out of memory (Killing remote allocating task)'"
+#tools/testing/selftests/vm/mmap_write -p /mnt/tmpfs/test -s $((11 * 1024 * 1024))
+
+# OOM the remote container on non pagefault.
+echo
+echo
+echo "OOMing the remote container using cat (non-pagefault)"
+echo "This will take a long time because the kernel goes through reclaim retries,"
+echo "but should eventually the cat command should receive an ENOMEM"
+cat /dev/random > /mnt/tmpfs/random
+
+cleanup
+echo SUCCESS

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-11 16:53     ` kernel test robot
  -1 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 16:53 UTC (permalink / raw)
  To: Mina Almasry
  Cc: kbuild-all, Mina Almasry, Michal Hocko, Theodore Ts'o,
	Greg Thelen, Shakeel Butt, Andrew Morton,
	Linux Memory Management List, Hugh Dickins, Roman Gushchin,
	Dave Chinner

[-- Attachment #1: Type: text/plain, Size: 5198 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: um-i386_defconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/shmem.c:118:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
     118 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_add_to_page_cache':
>> mm/shmem.c:721:25: error: dereferencing pointer to incomplete type 'struct mem_cgroup'
     721 |    css_put(&remote_memcg->css);
         |                         ^~
   mm/shmem.c: In function 'shmem_parse_one':
   mm/shmem.c:3410:21: warning: unused variable 'memcg' [-Wunused-variable]
    3410 |  struct mem_cgroup *memcg;
         |                     ^~~~~
   mm/shmem.c: In function 'shmem_reconfigure':
   mm/shmem.c:3616:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3616 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_fill_super':
   mm/shmem.c:3772:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3772 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
--
   fs/super.c: In function 'destroy_unused_super':
>> fs/super.c:184:33: error: 'struct super_block' has no member named 's_memcg_to_charge'
     184 |  mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
         |                                 ^~
   fs/super.c: In function '__put_super':
   fs/super.c:297:34: error: 'struct super_block' has no member named 's_memcg_to_charge'
     297 |   mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
         |                                  ^~


vim +721 mm/shmem.c

   691	
   692	/*
   693	 * Like add_to_page_cache_locked, but error if expected item has gone.
   694	 */
   695	static int shmem_add_to_page_cache(struct page *page,
   696					   struct address_space *mapping,
   697					   pgoff_t index, void *expected, gfp_t gfp,
   698					   struct mm_struct *charge_mm)
   699	{
   700		XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
   701		unsigned long i = 0;
   702		unsigned long nr = compound_nr(page);
   703		int error;
   704		struct mem_cgroup *remote_memcg;
   705	
   706		VM_BUG_ON_PAGE(PageTail(page), page);
   707		VM_BUG_ON_PAGE(index != round_down(index, nr), page);
   708		VM_BUG_ON_PAGE(!PageLocked(page), page);
   709		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
   710		VM_BUG_ON(expected && PageTransHuge(page));
   711	
   712		page_ref_add(page, nr);
   713		page->mapping = mapping;
   714		page->index = index;
   715	
   716		if (!PageSwapCache(page)) {
   717			remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
   718			if (remote_memcg) {
   719				error = mem_cgroup_charge_memcg(page_folio(page),
   720								remote_memcg, gfp);
 > 721				css_put(&remote_memcg->css);
   722			} else
   723				error = mem_cgroup_charge(page_folio(page), charge_mm,
   724							  gfp);
   725			if (error) {
   726				if (PageTransHuge(page)) {
   727					count_vm_event(THP_FILE_FALLBACK);
   728					count_vm_event(THP_FILE_FALLBACK_CHARGE);
   729				}
   730				goto error;
   731			}
   732		}
   733		cgroup_throttle_swaprate(page, gfp);
   734	
   735		do {
   736			void *entry;
   737			xas_lock_irq(&xas);
   738			entry = xas_find_conflict(&xas);
   739			if (entry != expected)
   740				xas_set_err(&xas, -EEXIST);
   741			xas_create_range(&xas);
   742			if (xas_error(&xas))
   743				goto unlock;
   744	next:
   745			xas_store(&xas, page);
   746			if (++i < nr) {
   747				xas_next(&xas);
   748				goto next;
   749			}
   750			if (PageTransHuge(page)) {
   751				count_vm_event(THP_FILE_ALLOC);
   752				__mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
   753			}
   754			mapping->nrpages += nr;
   755			__mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
   756			__mod_lruvec_page_state(page, NR_SHMEM, nr);
   757	unlock:
   758			xas_unlock_irq(&xas);
   759		} while (xas_nomem(&xas, gfp));
   760	
   761		if (xas_error(&xas)) {
   762			error = xas_error(&xas);
   763			goto error;
   764		}
   765	
   766		return 0;
   767	error:
   768		page->mapping = NULL;
   769		page_ref_sub(page, nr);
   770		return error;
   771	}
   772	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 9939 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
@ 2021-11-11 16:53     ` kernel test robot
  0 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 16:53 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5342 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: um-i386_defconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/shmem.c:118:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
     118 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_add_to_page_cache':
>> mm/shmem.c:721:25: error: dereferencing pointer to incomplete type 'struct mem_cgroup'
     721 |    css_put(&remote_memcg->css);
         |                         ^~
   mm/shmem.c: In function 'shmem_parse_one':
   mm/shmem.c:3410:21: warning: unused variable 'memcg' [-Wunused-variable]
    3410 |  struct mem_cgroup *memcg;
         |                     ^~~~~
   mm/shmem.c: In function 'shmem_reconfigure':
   mm/shmem.c:3616:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3616 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_fill_super':
   mm/shmem.c:3772:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3772 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
--
   fs/super.c: In function 'destroy_unused_super':
>> fs/super.c:184:33: error: 'struct super_block' has no member named 's_memcg_to_charge'
     184 |  mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
         |                                 ^~
   fs/super.c: In function '__put_super':
   fs/super.c:297:34: error: 'struct super_block' has no member named 's_memcg_to_charge'
     297 |   mem_cgroup_set_charge_target(&s->s_memcg_to_charge, NULL);
         |                                  ^~


vim +721 mm/shmem.c

   691	
   692	/*
   693	 * Like add_to_page_cache_locked, but error if expected item has gone.
   694	 */
   695	static int shmem_add_to_page_cache(struct page *page,
   696					   struct address_space *mapping,
   697					   pgoff_t index, void *expected, gfp_t gfp,
   698					   struct mm_struct *charge_mm)
   699	{
   700		XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
   701		unsigned long i = 0;
   702		unsigned long nr = compound_nr(page);
   703		int error;
   704		struct mem_cgroup *remote_memcg;
   705	
   706		VM_BUG_ON_PAGE(PageTail(page), page);
   707		VM_BUG_ON_PAGE(index != round_down(index, nr), page);
   708		VM_BUG_ON_PAGE(!PageLocked(page), page);
   709		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
   710		VM_BUG_ON(expected && PageTransHuge(page));
   711	
   712		page_ref_add(page, nr);
   713		page->mapping = mapping;
   714		page->index = index;
   715	
   716		if (!PageSwapCache(page)) {
   717			remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
   718			if (remote_memcg) {
   719				error = mem_cgroup_charge_memcg(page_folio(page),
   720								remote_memcg, gfp);
 > 721				css_put(&remote_memcg->css);
   722			} else
   723				error = mem_cgroup_charge(page_folio(page), charge_mm,
   724							  gfp);
   725			if (error) {
   726				if (PageTransHuge(page)) {
   727					count_vm_event(THP_FILE_FALLBACK);
   728					count_vm_event(THP_FILE_FALLBACK_CHARGE);
   729				}
   730				goto error;
   731			}
   732		}
   733		cgroup_throttle_swaprate(page, gfp);
   734	
   735		do {
   736			void *entry;
   737			xas_lock_irq(&xas);
   738			entry = xas_find_conflict(&xas);
   739			if (entry != expected)
   740				xas_set_err(&xas, -EEXIST);
   741			xas_create_range(&xas);
   742			if (xas_error(&xas))
   743				goto unlock;
   744	next:
   745			xas_store(&xas, page);
   746			if (++i < nr) {
   747				xas_next(&xas);
   748				goto next;
   749			}
   750			if (PageTransHuge(page)) {
   751				count_vm_event(THP_FILE_ALLOC);
   752				__mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
   753			}
   754			mapping->nrpages += nr;
   755			__mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
   756			__mod_lruvec_page_state(page, NR_SHMEM, nr);
   757	unlock:
   758			xas_unlock_irq(&xas);
   759		} while (xas_nomem(&xas, gfp));
   760	
   761		if (xas_error(&xas)) {
   762			error = xas_error(&xas);
   763			goto error;
   764		}
   765	
   766		return 0;
   767	error:
   768		page->mapping = NULL;
   769		page_ref_sub(page, nr);
   770		return error;
   771	}
   772	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 9939 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/4] mm/oom: handle remote ooms
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-11 17:15     ` kernel test robot
  -1 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 17:15 UTC (permalink / raw)
  To: Mina Almasry
  Cc: kbuild-all, Mina Almasry, Michal Hocko, Theodore Ts'o,
	Greg Thelen, Shakeel Butt, Andrew Morton,
	Linux Memory Management List, Hugh Dickins, Roman Gushchin,
	Dave Chinner

[-- Attachment #1: Type: text/plain, Size: 5221 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: um-i386_defconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/452a4d110272eb39892e4be30526411c7a5cb2e3
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout 452a4d110272eb39892e4be30526411c7a5cb2e3
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/oom_kill.c: In function 'out_of_memory':
>> mm/oom_kill.c:1116:15: error: 'struct task_struct' has no member named 'in_user_fault'
    1116 |    if (current->in_user_fault &&
         |               ^~


vim +1116 mm/oom_kill.c

  1044	
  1045	/**
  1046	 * out_of_memory - kill the "best" process when we run out of memory
  1047	 * @oc: pointer to struct oom_control
  1048	 *
  1049	 * If we run out of memory, we have the choice between either
  1050	 * killing a random task (bad), letting the system crash (worse)
  1051	 * OR try to be smart about which process to kill. Note that we
  1052	 * don't have to be perfect here, we just have to be good.
  1053	 */
  1054	bool out_of_memory(struct oom_control *oc)
  1055	{
  1056		unsigned long freed = 0;
  1057	
  1058		if (oom_killer_disabled)
  1059			return false;
  1060	
  1061		if (!is_memcg_oom(oc)) {
  1062			blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
  1063			if (freed > 0)
  1064				/* Got some memory back in the last second. */
  1065				return true;
  1066		}
  1067	
  1068		/*
  1069		 * If current has a pending SIGKILL or is exiting, then automatically
  1070		 * select it.  The goal is to allow it to allocate so that it may
  1071		 * quickly exit and free its memory.
  1072		 */
  1073		if (task_will_free_mem(current)) {
  1074			mark_oom_victim(current);
  1075			wake_oom_reaper(current);
  1076			return true;
  1077		}
  1078	
  1079		/*
  1080		 * The OOM killer does not compensate for IO-less reclaim.
  1081		 * pagefault_out_of_memory lost its gfp context so we have to
  1082		 * make sure exclude 0 mask - all other users should have at least
  1083		 * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
  1084		 * invoke the OOM killer even if it is a GFP_NOFS allocation.
  1085		 */
  1086		if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
  1087			return true;
  1088	
  1089		/*
  1090		 * Check if there were limitations on the allocation (only relevant for
  1091		 * NUMA and memcg) that may require different handling.
  1092		 */
  1093		oc->constraint = constrained_alloc(oc);
  1094		if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
  1095			oc->nodemask = NULL;
  1096		check_panic_on_oom(oc);
  1097	
  1098		if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
  1099		    current->mm && !oom_unkillable_task(current) &&
  1100		    oom_cpuset_eligible(current, oc) &&
  1101		    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
  1102			get_task_struct(current);
  1103			oc->chosen = current;
  1104			oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
  1105			return true;
  1106		}
  1107	
  1108		select_bad_process(oc);
  1109		/* Found nothing?!?! */
  1110		if (!oc->chosen) {
  1111			if (is_remote_oom(oc->memcg)) {
  1112				/*
  1113				 * For remote ooms in userfaults, we have no choice but
  1114				 * to kill the allocating process.
  1115				 */
> 1116				if (current->in_user_fault &&
  1117				    !oom_unkillable_task(current)) {
  1118					get_task_struct(current);
  1119					oc->chosen = current;
  1120					oom_kill_process(
  1121						oc,
  1122						"Out of memory (Killing remote allocating task)");
  1123					return true;
  1124				}
  1125	
  1126				/*
  1127				 * For remote ooms in non-userfaults, simply return
  1128				 * ENOMEM to the caller.
  1129				 */
  1130				return false;
  1131			}
  1132	
  1133			dump_header(oc, NULL);
  1134			pr_warn("Out of memory and no killable processes...\n");
  1135			/*
  1136			 * If we got here due to an actual allocation at the
  1137			 * system level, we cannot survive this and will enter
  1138			 * an endless loop in the allocator. Bail out now.
  1139			 */
  1140			if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
  1141				panic("System is deadlocked on memory\n");
  1142		}
  1143		if (oc->chosen && oc->chosen != (void *)-1UL)
  1144			oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
  1145					 "Memory cgroup out of memory");
  1146		return !!oc->chosen;
  1147	}
  1148	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 9939 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/4] mm/oom: handle remote ooms
@ 2021-11-11 17:15     ` kernel test robot
  0 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 17:15 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5364 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: um-i386_defconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/452a4d110272eb39892e4be30526411c7a5cb2e3
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout 452a4d110272eb39892e4be30526411c7a5cb2e3
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=um SUBARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/oom_kill.c: In function 'out_of_memory':
>> mm/oom_kill.c:1116:15: error: 'struct task_struct' has no member named 'in_user_fault'
    1116 |    if (current->in_user_fault &&
         |               ^~


vim +1116 mm/oom_kill.c

  1044	
  1045	/**
  1046	 * out_of_memory - kill the "best" process when we run out of memory
  1047	 * @oc: pointer to struct oom_control
  1048	 *
  1049	 * If we run out of memory, we have the choice between either
  1050	 * killing a random task (bad), letting the system crash (worse)
  1051	 * OR try to be smart about which process to kill. Note that we
  1052	 * don't have to be perfect here, we just have to be good.
  1053	 */
  1054	bool out_of_memory(struct oom_control *oc)
  1055	{
  1056		unsigned long freed = 0;
  1057	
  1058		if (oom_killer_disabled)
  1059			return false;
  1060	
  1061		if (!is_memcg_oom(oc)) {
  1062			blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
  1063			if (freed > 0)
  1064				/* Got some memory back in the last second. */
  1065				return true;
  1066		}
  1067	
  1068		/*
  1069		 * If current has a pending SIGKILL or is exiting, then automatically
  1070		 * select it.  The goal is to allow it to allocate so that it may
  1071		 * quickly exit and free its memory.
  1072		 */
  1073		if (task_will_free_mem(current)) {
  1074			mark_oom_victim(current);
  1075			wake_oom_reaper(current);
  1076			return true;
  1077		}
  1078	
  1079		/*
  1080		 * The OOM killer does not compensate for IO-less reclaim.
  1081		 * pagefault_out_of_memory lost its gfp context so we have to
  1082		 * make sure exclude 0 mask - all other users should have at least
  1083		 * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
  1084		 * invoke the OOM killer even if it is a GFP_NOFS allocation.
  1085		 */
  1086		if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
  1087			return true;
  1088	
  1089		/*
  1090		 * Check if there were limitations on the allocation (only relevant for
  1091		 * NUMA and memcg) that may require different handling.
  1092		 */
  1093		oc->constraint = constrained_alloc(oc);
  1094		if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
  1095			oc->nodemask = NULL;
  1096		check_panic_on_oom(oc);
  1097	
  1098		if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
  1099		    current->mm && !oom_unkillable_task(current) &&
  1100		    oom_cpuset_eligible(current, oc) &&
  1101		    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
  1102			get_task_struct(current);
  1103			oc->chosen = current;
  1104			oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
  1105			return true;
  1106		}
  1107	
  1108		select_bad_process(oc);
  1109		/* Found nothing?!?! */
  1110		if (!oc->chosen) {
  1111			if (is_remote_oom(oc->memcg)) {
  1112				/*
  1113				 * For remote ooms in userfaults, we have no choice but
  1114				 * to kill the allocating process.
  1115				 */
> 1116				if (current->in_user_fault &&
  1117				    !oom_unkillable_task(current)) {
  1118					get_task_struct(current);
  1119					oc->chosen = current;
  1120					oom_kill_process(
  1121						oc,
  1122						"Out of memory (Killing remote allocating task)");
  1123					return true;
  1124				}
  1125	
  1126				/*
  1127				 * For remote ooms in non-userfaults, simply return
  1128				 * ENOMEM to the caller.
  1129				 */
  1130				return false;
  1131			}
  1132	
  1133			dump_header(oc, NULL);
  1134			pr_warn("Out of memory and no killable processes...\n");
  1135			/*
  1136			 * If we got here due to an actual allocation at the
  1137			 * system level, we cannot survive this and will enter
  1138			 * an endless loop in the allocator. Bail out now.
  1139			 */
  1140			if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
  1141				panic("System is deadlocked on memory\n");
  1142		}
  1143		if (oc->chosen && oc->chosen != (void *)-1UL)
  1144			oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
  1145					 "Memory cgroup out of memory");
  1146		return !!oc->chosen;
  1147	}
  1148	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 9939 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
  2021-11-10 21:19   ` Mina Almasry
@ 2021-11-11 17:16     ` kernel test robot
  -1 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 17:16 UTC (permalink / raw)
  To: Mina Almasry
  Cc: kbuild-all, Mina Almasry, Michal Hocko, Theodore Ts'o,
	Greg Thelen, Shakeel Butt, Andrew Morton,
	Linux Memory Management List, Hugh Dickins, Roman Gushchin,
	Dave Chinner

[-- Attachment #1: Type: text/plain, Size: 5145 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: ia64-defconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=ia64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/shmem.c:118:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
     118 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_add_to_page_cache':
>> mm/shmem.c:721:46: error: invalid use of undefined type 'struct mem_cgroup'
     721 |                         css_put(&remote_memcg->css);
         |                                              ^~
   mm/shmem.c: In function 'shmem_getpage_gfp':
   mm/shmem.c:1863:38: error: invalid use of undefined type 'struct mem_cgroup'
    1863 |                 css_put(&remote_memcg->css);
         |                                      ^~
   mm/shmem.c: In function 'shmem_parse_one':
   mm/shmem.c:3410:28: warning: unused variable 'memcg' [-Wunused-variable]
    3410 |         struct mem_cgroup *memcg;
         |                            ^~~~~
   mm/shmem.c: In function 'shmem_reconfigure':
   mm/shmem.c:3616:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3616 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_fill_super':
   mm/shmem.c:3772:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3772 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~


vim +721 mm/shmem.c

   691	
   692	/*
   693	 * Like add_to_page_cache_locked, but error if expected item has gone.
   694	 */
   695	static int shmem_add_to_page_cache(struct page *page,
   696					   struct address_space *mapping,
   697					   pgoff_t index, void *expected, gfp_t gfp,
   698					   struct mm_struct *charge_mm)
   699	{
   700		XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
   701		unsigned long i = 0;
   702		unsigned long nr = compound_nr(page);
   703		int error;
   704		struct mem_cgroup *remote_memcg;
   705	
   706		VM_BUG_ON_PAGE(PageTail(page), page);
   707		VM_BUG_ON_PAGE(index != round_down(index, nr), page);
   708		VM_BUG_ON_PAGE(!PageLocked(page), page);
   709		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
   710		VM_BUG_ON(expected && PageTransHuge(page));
   711	
   712		page_ref_add(page, nr);
   713		page->mapping = mapping;
   714		page->index = index;
   715	
   716		if (!PageSwapCache(page)) {
   717			remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
   718			if (remote_memcg) {
   719				error = mem_cgroup_charge_memcg(page_folio(page),
   720								remote_memcg, gfp);
 > 721				css_put(&remote_memcg->css);
   722			} else
   723				error = mem_cgroup_charge(page_folio(page), charge_mm,
   724							  gfp);
   725			if (error) {
   726				if (PageTransHuge(page)) {
   727					count_vm_event(THP_FILE_FALLBACK);
   728					count_vm_event(THP_FILE_FALLBACK_CHARGE);
   729				}
   730				goto error;
   731			}
   732		}
   733		cgroup_throttle_swaprate(page, gfp);
   734	
   735		do {
   736			void *entry;
   737			xas_lock_irq(&xas);
   738			entry = xas_find_conflict(&xas);
   739			if (entry != expected)
   740				xas_set_err(&xas, -EEXIST);
   741			xas_create_range(&xas);
   742			if (xas_error(&xas))
   743				goto unlock;
   744	next:
   745			xas_store(&xas, page);
   746			if (++i < nr) {
   747				xas_next(&xas);
   748				goto next;
   749			}
   750			if (PageTransHuge(page)) {
   751				count_vm_event(THP_FILE_ALLOC);
   752				__mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
   753			}
   754			mapping->nrpages += nr;
   755			__mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
   756			__mod_lruvec_page_state(page, NR_SHMEM, nr);
   757	unlock:
   758			xas_unlock_irq(&xas);
   759		} while (xas_nomem(&xas, gfp));
   760	
   761		if (xas_error(&xas)) {
   762			error = xas_error(&xas);
   763			goto error;
   764		}
   765	
   766		return 0;
   767	error:
   768		page->mapping = NULL;
   769		page_ref_sub(page, nr);
   770		return error;
   771	}
   772	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 20057 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs
@ 2021-11-11 17:16     ` kernel test robot
  0 siblings, 0 replies; 18+ messages in thread
From: kernel test robot @ 2021-11-11 17:16 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5286 bytes --]

Hi Mina,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
base:   https://github.com/hnaz/linux-mm master
config: ia64-defconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mina-Almasry/mm-shmem-support-deterministic-charging-of-tmpfs/20211111-062122
        git checkout aaf0e3e0832977b6eb257e1ed27747cf97c479d4
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=ia64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/shmem.c:118:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
     118 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_add_to_page_cache':
>> mm/shmem.c:721:46: error: invalid use of undefined type 'struct mem_cgroup'
     721 |                         css_put(&remote_memcg->css);
         |                                              ^~
   mm/shmem.c: In function 'shmem_getpage_gfp':
   mm/shmem.c:1863:38: error: invalid use of undefined type 'struct mem_cgroup'
    1863 |                 css_put(&remote_memcg->css);
         |                                      ^~
   mm/shmem.c: In function 'shmem_parse_one':
   mm/shmem.c:3410:28: warning: unused variable 'memcg' [-Wunused-variable]
    3410 |         struct mem_cgroup *memcg;
         |                            ^~~~~
   mm/shmem.c: In function 'shmem_reconfigure':
   mm/shmem.c:3616:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3616 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~
   mm/shmem.c: In function 'shmem_fill_super':
   mm/shmem.c:3772:5: warning: "CONFIG_MEMCG" is not defined, evaluates to 0 [-Wundef]
    3772 | #if CONFIG_MEMCG
         |     ^~~~~~~~~~~~


vim +721 mm/shmem.c

   691	
   692	/*
   693	 * Like add_to_page_cache_locked, but error if expected item has gone.
   694	 */
   695	static int shmem_add_to_page_cache(struct page *page,
   696					   struct address_space *mapping,
   697					   pgoff_t index, void *expected, gfp_t gfp,
   698					   struct mm_struct *charge_mm)
   699	{
   700		XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
   701		unsigned long i = 0;
   702		unsigned long nr = compound_nr(page);
   703		int error;
   704		struct mem_cgroup *remote_memcg;
   705	
   706		VM_BUG_ON_PAGE(PageTail(page), page);
   707		VM_BUG_ON_PAGE(index != round_down(index, nr), page);
   708		VM_BUG_ON_PAGE(!PageLocked(page), page);
   709		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
   710		VM_BUG_ON(expected && PageTransHuge(page));
   711	
   712		page_ref_add(page, nr);
   713		page->mapping = mapping;
   714		page->index = index;
   715	
   716		if (!PageSwapCache(page)) {
   717			remote_memcg = mem_cgroup_mapping_get_charge_target(mapping);
   718			if (remote_memcg) {
   719				error = mem_cgroup_charge_memcg(page_folio(page),
   720								remote_memcg, gfp);
 > 721				css_put(&remote_memcg->css);
   722			} else
   723				error = mem_cgroup_charge(page_folio(page), charge_mm,
   724							  gfp);
   725			if (error) {
   726				if (PageTransHuge(page)) {
   727					count_vm_event(THP_FILE_FALLBACK);
   728					count_vm_event(THP_FILE_FALLBACK_CHARGE);
   729				}
   730				goto error;
   731			}
   732		}
   733		cgroup_throttle_swaprate(page, gfp);
   734	
   735		do {
   736			void *entry;
   737			xas_lock_irq(&xas);
   738			entry = xas_find_conflict(&xas);
   739			if (entry != expected)
   740				xas_set_err(&xas, -EEXIST);
   741			xas_create_range(&xas);
   742			if (xas_error(&xas))
   743				goto unlock;
   744	next:
   745			xas_store(&xas, page);
   746			if (++i < nr) {
   747				xas_next(&xas);
   748				goto next;
   749			}
   750			if (PageTransHuge(page)) {
   751				count_vm_event(THP_FILE_ALLOC);
   752				__mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
   753			}
   754			mapping->nrpages += nr;
   755			__mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
   756			__mod_lruvec_page_state(page, NR_SHMEM, nr);
   757	unlock:
   758			xas_unlock_irq(&xas);
   759		} while (xas_nomem(&xas, gfp));
   760	
   761		if (xas_error(&xas)) {
   762			error = xas_error(&xas);
   763			goto error;
   764		}
   765	
   766		return 0;
   767	error:
   768		page->mapping = NULL;
   769		page_ref_sub(page, nr);
   770		return error;
   771	}
   772	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 20057 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-11-11 17:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20211110211951.3730787-1-almasrymina@google.com>
2021-11-10 21:19 ` [PATCH v2 1/4] mm/shmem: support deterministic charging of tmpfs Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-11 16:53   ` kernel test robot
2021-11-11 16:53     ` kernel test robot
2021-11-11 17:16   ` kernel test robot
2021-11-11 17:16     ` kernel test robot
2021-11-10 21:19 ` [PATCH v2 2/4] mm/oom: handle remote ooms Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-11 17:15   ` kernel test robot
2021-11-11 17:15     ` kernel test robot
2021-11-10 21:19 ` [PATCH v2 3/4] mm, shmem: add tmpfs memcg= option documentation Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-10 21:19 ` [PATCH v2 4/4] mm, shmem, selftests: add tmpfs memcg= mount option tests Mina Almasry
2021-11-10 21:19   ` Mina Almasry
2021-11-10 21:19   ` Mina Almasry

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.