mm-commits.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: akpm@linux-foundation.org, bigeasy@linutronix.de,
	linux-mm@kvack.org, minchan@kernel.org,
	mm-commits@vger.kernel.org, peterz@infradead.org,
	senozhatsky@chromium.org, tglx@linutronix.de,
	torvalds@linux-foundation.org, umgwanakikbuti@gmail.com
Subject: [patch 49/69] zsmalloc: remove zspage isolation for migration
Date: Fri, 21 Jan 2022 22:14:07 -0800	[thread overview]
Message-ID: <20220122061407.GUBINEueN%akpm@linux-foundation.org> (raw)
In-Reply-To: <20220121221021.60533b009c357d660791476e@linux-foundation.org>

From: Minchan Kim <minchan@kernel.org>
Subject: zsmalloc: remove zspage isolation for migration

zspage isolation for migration introduced additional exceptions to be
dealt with since the zspage was isolated from class list.  The reason why
I isolated zspage from class list was to prevent race between obj_malloc
and page migration via allocating zpage from the zspage further.  However,
it couldn't prevent object freeing from zspage so it needed corner case
handling.

This patch removes the whole mess.  Now, we are fine since class->lock and
zspage->lock can prevent the race.

Link: https://lkml.kernel.org/r/20211115185909.3949505-7-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/zsmalloc.c |  157 ++----------------------------------------------
 1 file changed, 8 insertions(+), 149 deletions(-)

--- a/mm/zsmalloc.c~zsmalloc-remove-zspage-isolation-for-migration
+++ a/mm/zsmalloc.c
@@ -254,10 +254,6 @@ struct zs_pool {
 #ifdef CONFIG_COMPACTION
 	struct inode *inode;
 	struct work_struct free_work;
-	/* A wait queue for when migration races with async_free_zspage() */
-	struct wait_queue_head migration_wait;
-	atomic_long_t isolated_pages;
-	bool destroying;
 #endif
 };
 
@@ -454,11 +450,6 @@ MODULE_ALIAS("zpool-zsmalloc");
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
 
-static bool is_zspage_isolated(struct zspage *zspage)
-{
-	return zspage->isolated;
-}
-
 static __maybe_unused int is_first_page(struct page *page)
 {
 	return PagePrivate(page);
@@ -744,7 +735,6 @@ static void remove_zspage(struct size_cl
 				enum fullness_group fullness)
 {
 	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
-	VM_BUG_ON(is_zspage_isolated(zspage));
 
 	list_del_init(&zspage->list);
 	class_stat_dec(class, fullness, 1);
@@ -770,13 +760,9 @@ static enum fullness_group fix_fullness_
 	if (newfg == currfg)
 		goto out;
 
-	if (!is_zspage_isolated(zspage)) {
-		remove_zspage(class, zspage, currfg);
-		insert_zspage(class, zspage, newfg);
-	}
-
+	remove_zspage(class, zspage, currfg);
+	insert_zspage(class, zspage, newfg);
 	set_zspage_mapping(zspage, class_idx, newfg);
-
 out:
 	return newfg;
 }
@@ -1511,7 +1497,6 @@ void zs_free(struct zs_pool *pool, unsig
 	unsigned long obj;
 	struct size_class *class;
 	enum fullness_group fullness;
-	bool isolated;
 
 	if (unlikely(!handle))
 		return;
@@ -1533,11 +1518,9 @@ void zs_free(struct zs_pool *pool, unsig
 		goto out;
 	}
 
-	isolated = is_zspage_isolated(zspage);
 	migrate_read_unlock(zspage);
 	/* If zspage is isolated, zs_page_putback will free the zspage */
-	if (likely(!isolated))
-		free_zspage(pool, class, zspage);
+	free_zspage(pool, class, zspage);
 out:
 
 	spin_unlock(&class->lock);
@@ -1718,7 +1701,6 @@ static struct zspage *isolate_zspage(str
 		zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
 							struct zspage, list);
 		if (zspage) {
-			VM_BUG_ON(is_zspage_isolated(zspage));
 			remove_zspage(class, zspage, fg[i]);
 			return zspage;
 		}
@@ -1739,8 +1721,6 @@ static enum fullness_group putback_zspag
 {
 	enum fullness_group fullness;
 
-	VM_BUG_ON(is_zspage_isolated(zspage));
-
 	fullness = get_fullness_group(class, zspage);
 	insert_zspage(class, zspage, fullness);
 	set_zspage_mapping(zspage, class->index, fullness);
@@ -1822,35 +1802,10 @@ static void inc_zspage_isolation(struct
 
 static void dec_zspage_isolation(struct zspage *zspage)
 {
+	VM_BUG_ON(zspage->isolated == 0);
 	zspage->isolated--;
 }
 
-static void putback_zspage_deferred(struct zs_pool *pool,
-				    struct size_class *class,
-				    struct zspage *zspage)
-{
-	enum fullness_group fg;
-
-	fg = putback_zspage(class, zspage);
-	if (fg == ZS_EMPTY)
-		schedule_work(&pool->free_work);
-
-}
-
-static inline void zs_pool_dec_isolated(struct zs_pool *pool)
-{
-	VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
-	atomic_long_dec(&pool->isolated_pages);
-	/*
-	 * Checking pool->destroying must happen after atomic_long_dec()
-	 * for pool->isolated_pages above. Paired with the smp_mb() in
-	 * zs_unregister_migration().
-	 */
-	smp_mb__after_atomic();
-	if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
-		wake_up_all(&pool->migration_wait);
-}
-
 static void replace_sub_page(struct size_class *class, struct zspage *zspage,
 				struct page *newpage, struct page *oldpage)
 {
@@ -1876,10 +1831,7 @@ static void replace_sub_page(struct size
 
 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 {
-	struct zs_pool *pool;
-	struct size_class *class;
 	struct zspage *zspage;
-	struct address_space *mapping;
 
 	/*
 	 * Page is locked so zspage couldn't be destroyed. For detail, look at
@@ -1889,39 +1841,9 @@ static bool zs_page_isolate(struct page
 	VM_BUG_ON_PAGE(PageIsolated(page), page);
 
 	zspage = get_zspage(page);
-
-	mapping = page_mapping(page);
-	pool = mapping->private_data;
-
-	class = zspage_class(pool, zspage);
-
-	spin_lock(&class->lock);
-	if (get_zspage_inuse(zspage) == 0) {
-		spin_unlock(&class->lock);
-		return false;
-	}
-
-	/* zspage is isolated for object migration */
-	if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
-		spin_unlock(&class->lock);
-		return false;
-	}
-
-	/*
-	 * If this is first time isolation for the zspage, isolate zspage from
-	 * size_class to prevent further object allocation from the zspage.
-	 */
-	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
-		enum fullness_group fullness;
-		unsigned int class_idx;
-
-		get_zspage_mapping(zspage, &class_idx, &fullness);
-		atomic_long_inc(&pool->isolated_pages);
-		remove_zspage(class, zspage, fullness);
-	}
-
+	migrate_write_lock(zspage);
 	inc_zspage_isolation(zspage);
-	spin_unlock(&class->lock);
+	migrate_write_unlock(zspage);
 
 	return true;
 }
@@ -2004,21 +1926,6 @@ static int zs_page_migrate(struct addres
 
 	dec_zspage_isolation(zspage);
 
-	/*
-	 * Page migration is done so let's putback isolated zspage to
-	 * the list if @page is final isolated subpage in the zspage.
-	 */
-	if (!is_zspage_isolated(zspage)) {
-		/*
-		 * We cannot race with zs_destroy_pool() here because we wait
-		 * for isolation to hit zero before we start destroying.
-		 * Also, we ensure that everyone can see pool->destroying before
-		 * we start waiting.
-		 */
-		putback_zspage_deferred(pool, class, zspage);
-		zs_pool_dec_isolated(pool);
-	}
-
 	if (page_zone(newpage) != page_zone(page)) {
 		dec_zone_page_state(page, NR_ZSPAGES);
 		inc_zone_page_state(newpage, NR_ZSPAGES);
@@ -2046,30 +1953,15 @@ unpin_objects:
 
 static void zs_page_putback(struct page *page)
 {
-	struct zs_pool *pool;
-	struct size_class *class;
-	struct address_space *mapping;
 	struct zspage *zspage;
 
 	VM_BUG_ON_PAGE(!PageMovable(page), page);
 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
 	zspage = get_zspage(page);
-	mapping = page_mapping(page);
-	pool = mapping->private_data;
-	class = zspage_class(pool, zspage);
-
-	spin_lock(&class->lock);
+	migrate_write_lock(zspage);
 	dec_zspage_isolation(zspage);
-	if (!is_zspage_isolated(zspage)) {
-		/*
-		 * Due to page_lock, we cannot free zspage immediately
-		 * so let's defer.
-		 */
-		putback_zspage_deferred(pool, class, zspage);
-		zs_pool_dec_isolated(pool);
-	}
-	spin_unlock(&class->lock);
+	migrate_write_unlock(zspage);
 }
 
 static const struct address_space_operations zsmalloc_aops = {
@@ -2091,36 +1983,8 @@ static int zs_register_migration(struct
 	return 0;
 }
 
-static bool pool_isolated_are_drained(struct zs_pool *pool)
-{
-	return atomic_long_read(&pool->isolated_pages) == 0;
-}
-
-/* Function for resolving migration */
-static void wait_for_isolated_drain(struct zs_pool *pool)
-{
-
-	/*
-	 * We're in the process of destroying the pool, so there are no
-	 * active allocations. zs_page_isolate() fails for completely free
-	 * zspages, so we need only wait for the zs_pool's isolated
-	 * count to hit zero.
-	 */
-	wait_event(pool->migration_wait,
-		   pool_isolated_are_drained(pool));
-}
-
 static void zs_unregister_migration(struct zs_pool *pool)
 {
-	pool->destroying = true;
-	/*
-	 * We need a memory barrier here to ensure global visibility of
-	 * pool->destroying. Thus pool->isolated pages will either be 0 in which
-	 * case we don't care, or it will be > 0 and pool->destroying will
-	 * ensure that we wake up once isolation hits 0.
-	 */
-	smp_mb();
-	wait_for_isolated_drain(pool); /* This can block */
 	flush_work(&pool->free_work);
 	iput(pool->inode);
 }
@@ -2150,7 +2014,6 @@ static void async_free_zspage(struct wor
 		spin_unlock(&class->lock);
 	}
 
-
 	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
 		list_del(&zspage->list);
 		lock_zspage(zspage);
@@ -2363,10 +2226,6 @@ struct zs_pool *zs_create_pool(const cha
 	if (!pool->name)
 		goto err;
 
-#ifdef CONFIG_COMPACTION
-	init_waitqueue_head(&pool->migration_wait);
-#endif
-
 	if (create_cache(pool))
 		goto err;
 
_

  parent reply	other threads:[~2022-01-22  6:14 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-22  6:10 incoming Andrew Morton
2022-01-22  6:10 ` [patch 01/69] mm/migrate.c: rework migration_entry_wait() to not take a pageref Andrew Morton
2022-01-22  6:10 ` [patch 02/69] sysctl: add a new register_sysctl_init() interface Andrew Morton
2022-01-22  6:10 ` [patch 03/69] sysctl: move some boundary constants from sysctl.c to sysctl_vals Andrew Morton
2022-01-22  6:11 ` [patch 04/69] hung_task: move hung_task sysctl interface to hung_task.c Andrew Morton
2022-01-22  6:11 ` [patch 05/69] watchdog: move watchdog sysctl interface to watchdog.c Andrew Morton
2022-01-22  6:11 ` [patch 06/69] sysctl: make ngroups_max const Andrew Morton
2022-01-22  6:11 ` [patch 07/69] sysctl: use const for typically used max/min proc sysctls Andrew Morton
2022-01-22  6:11 ` [patch 08/69] sysctl: use SYSCTL_ZERO to replace some static int zero uses Andrew Morton
2022-01-22  6:11 ` [patch 09/69] aio: move aio sysctl to aio.c Andrew Morton
2022-01-22  6:11 ` [patch 10/69] dnotify: move dnotify sysctl to dnotify.c Andrew Morton
2022-01-22  6:11 ` [patch 11/69] hpet: simplify subdirectory registration with register_sysctl() Andrew Morton
2022-01-22  6:11 ` [patch 12/69] i915: " Andrew Morton
2022-01-22  6:11 ` [patch 13/69] macintosh/mac_hid.c: " Andrew Morton
2022-01-22  6:11 ` [patch 14/69] ocfs2: " Andrew Morton
2022-01-22  6:11 ` [patch 15/69] test_sysctl: " Andrew Morton
2022-01-22  6:11 ` [patch 16/69] inotify: " Andrew Morton
2022-01-22  6:12 ` [patch 17/69] cdrom: " Andrew Morton
2022-01-22  6:12 ` [patch 18/69] eventpoll: simplify sysctl declaration " Andrew Morton
2022-01-22  6:12 ` [patch 19/69] firmware_loader: move firmware sysctl to its own files Andrew Morton
2022-01-22  6:12 ` [patch 20/69] random: move the random sysctl declarations to its own file Andrew Morton
2022-01-22  6:12 ` [patch 21/69] sysctl: add helper to register a sysctl mount point Andrew Morton
2022-01-22  6:12 ` [patch 22/69] fs: move binfmt_misc sysctl to its own file Andrew Morton
2022-02-07 13:27   ` [PATCH] Fix regression due to "fs: move binfmt_misc sysctl to its own file" Domenico Andreoli
2022-02-07 21:46     ` Luis Chamberlain
2022-02-07 22:53       ` Tong Zhang
2022-02-08 17:20         ` Luis Chamberlain
2022-02-09  7:31           ` Domenico Andreoli
2022-02-09  7:49           ` [PATCH v2] " Domenico Andreoli
2022-02-09  7:55             ` Tong Zhang
2022-02-13 15:34             ` Ido Schimmel
2022-02-13 21:10               ` Tong Zhang
2022-02-14  7:47                 ` Ido Schimmel
2022-02-08  6:46     ` [PATCH] " Thorsten Leemhuis
2022-01-22  6:12 ` [patch 23/69] printk: move printk sysctl to printk/sysctl.c Andrew Morton
2022-01-22  6:12 ` [patch 24/69] scsi/sg: move sg-big-buff sysctl to scsi/sg.c Andrew Morton
2022-01-22  6:12 ` [patch 25/69] stackleak: move stack_erasing sysctl to stackleak.c Andrew Morton
2022-01-22  6:12 ` [patch 26/69] sysctl: share unsigned long const values Andrew Morton
2022-01-22  6:12 ` [patch 27/69] fs: move inode sysctls to its own file Andrew Morton
2022-01-22  6:12 ` [patch 28/69] fs: move fs stat sysctls to file_table.c Andrew Morton
2022-01-22  6:12 ` [patch 29/69] fs: move dcache sysctls to its own file Andrew Morton
2022-01-22  6:13 ` [patch 30/69] sysctl: move maxolduid as a sysctl specific const Andrew Morton
2022-01-22  6:13 ` [patch 31/69] fs: move shared sysctls to fs/sysctls.c Andrew Morton
2022-01-22  6:13 ` [patch 32/69] fs: move locking sysctls where they are used Andrew Morton
2022-01-22  6:13 ` [patch 33/69] fs: move namei sysctls to its own file Andrew Morton
2022-01-22  6:13 ` [patch 34/69] fs: move fs/exec.c sysctls into " Andrew Morton
2022-01-22  6:13 ` [patch 35/69] fs: move pipe sysctls to is " Andrew Morton
2022-01-22  6:13 ` [patch 36/69] sysctl: add and use base directory declarer and registration helper Andrew Morton
2022-01-22  6:13 ` [patch 37/69] fs: move namespace sysctls and declare fs base directory Andrew Morton
2022-01-22  6:13 ` [patch 38/69] kernel/sysctl.c: rename sysctl_init() to sysctl_init_bases() Andrew Morton
2022-01-22  6:13 ` [patch 39/69] printk: fix build warning when CONFIG_PRINTK=n Andrew Morton
2022-01-22  6:13 ` [patch 40/69] fs/coredump: move coredump sysctls into its own file Andrew Morton
2022-01-22  6:13 ` [patch 41/69] kprobe: move sysctl_kprobes_optimization to kprobes.c Andrew Morton
2022-01-22  6:13 ` [patch 42/69] kernel/sysctl.c: remove unused variable ten_thousand Andrew Morton
2022-01-22  6:13 ` [patch 43/69] sysctl: returns -EINVAL when a negative value is passed to proc_doulongvec_minmax Andrew Morton
2022-01-22  6:13 ` [patch 44/69] zsmalloc: introduce some helper functions Andrew Morton
2022-01-22  6:13 ` [patch 45/69] zsmalloc: rename zs_stat_type to class_stat_type Andrew Morton
2022-01-22  6:13 ` [patch 46/69] zsmalloc: decouple class actions from zspage works Andrew Morton
2022-01-22  6:14 ` [patch 47/69] zsmalloc: introduce obj_allocated Andrew Morton
2022-01-22  6:14 ` [patch 48/69] zsmalloc: move huge compressed obj from page to zspage Andrew Morton
2022-01-22  6:14 ` Andrew Morton [this message]
2022-01-22  6:14 ` [patch 50/69] locking/rwlocks: introduce write_lock_nested Andrew Morton
2022-01-22  6:14 ` [patch 51/69] zsmalloc: replace per zpage lock with pool->migrate_lock Andrew Morton
2022-01-22  6:14 ` [patch 52/69] zsmalloc: replace get_cpu_var with local_lock Andrew Morton
2022-01-22  6:14 ` [patch 53/69] fs: proc: store PDE()->data into inode->i_private Andrew Morton
2022-01-22  6:14 ` [patch 54/69] proc: remove PDE_DATA() completely Andrew Morton
2022-01-22  6:14 ` [patch 55/69] lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() Andrew Morton
2022-01-22  6:14 ` [patch 56/69] lib/stackdepot: always do filter_irq_stacks() in stack_depot_save() Andrew Morton
2022-01-22  6:14 ` [patch 57/69] mm: remove cleancache Andrew Morton
2022-01-22  6:14 ` [patch 58/69] frontswap: remove frontswap_writethrough Andrew Morton
2022-01-22  6:14 ` [patch 59/69] frontswap: remove frontswap_tmem_exclusive_gets Andrew Morton
2022-01-22  6:14 ` [patch 60/69] frontswap: remove frontswap_shrink Andrew Morton
2022-01-22  6:14 ` [patch 61/69] frontswap: remove frontswap_curr_pages Andrew Morton
2022-01-22  6:14 ` [patch 62/69] frontswap: simplify frontswap_init Andrew Morton
2022-01-22  6:14 ` [patch 63/69] frontswap: remove the frontswap exports Andrew Morton
2022-01-22  6:14 ` [patch 64/69] mm: simplify try_to_unuse Andrew Morton
2022-01-22  6:15 ` [patch 65/69] frontswap: remove frontswap_test Andrew Morton
2022-01-22  6:15 ` [patch 66/69] frontswap: simplify frontswap_register_ops Andrew Morton
2022-01-22  6:15 ` [patch 67/69] mm: mark swap_lock and swap_active_head static Andrew Morton
2022-01-22  6:15 ` [patch 68/69] frontswap: remove support for multiple ops Andrew Morton
2022-01-22  6:15 ` [patch 69/69] mm: hide the FRONTSWAP Kconfig symbol Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220122061407.GUBINEueN%akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=bigeasy@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan@kernel.org \
    --cc=mm-commits@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=senozhatsky@chromium.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).