All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-04-24  2:42 ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-24  2:42 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

Thread A			Thread B		Thread C
- f2fs_remount
 - stop_gc_thread
				- f2fs_sbi_store
							- issue_discard_thread
   sbi->gc_thread = NULL;
				  sbi->gc_thread->gc_wake = 1
							  access sbi->gc_thread->gc_urgent

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, keep gc_thread structure valid in sbi all
the time instead of alloc/free it dynamically.

In addition, add a rw semaphore to exclude rw operation in fields of
gc_thread.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
v2:
- add a rw semaphore to exclude rw operation suggested by Jaegeuk.
 fs/f2fs/debug.c   |  3 +--
 fs/f2fs/f2fs.h    |  9 ++++++-
 fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
 fs/f2fs/gc.h      |  1 +
 fs/f2fs/segment.c | 10 +++++--
 fs/f2fs/super.c   | 26 +++++++++++++------
 fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
 7 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a66107b5cfff..0fbd674c66fb 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->cache_mem = 0;
 
 	/* build gc */
-	if (sbi->gc_thread)
-		si->cache_mem += sizeof(struct f2fs_gc_kthread);
+	si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
 	/* build merge flush thread */
 	if (SM_I(sbi)->fcc_info)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8f3ad9662d13..75d3b4875429 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
 	return (struct sit_info *)(SM_I(sbi)->sit_info);
 }
 
+static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
+}
+
 static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
 {
 	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
@@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
 /*
  * gc.c
  */
+int init_gc_context(struct f2fs_sb_info *sbi);
+void destroy_gc_context(struct f2fs_sb_info * sbi);
 int start_gc_thread(struct f2fs_sb_info *sbi);
-void stop_gc_thread(struct f2fs_sb_info *sbi);
+bool stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
 			unsigned int segno);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 70418b34c5f6..2febb84b2fd6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -26,8 +26,8 @@
 static int gc_thread_func(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
-	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
 	unsigned int wait_ms;
 
 	wait_ms = gc_th->min_sleep_time;
@@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
 	return 0;
 }
 
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int init_gc_context(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_gc_kthread *gc_th;
-	dev_t dev = sbi->sb->s_bdev->bd_dev;
-	int err = 0;
 
 	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-	if (!gc_th) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!gc_th)
+		return -ENOMEM;
+
+	gc_th->f2fs_gc_task = NULL;
 
 	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
 	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
@@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
 	gc_th->gc_urgent = 0;
 	gc_th->gc_wake= 0;
 
+	init_rwsem(&gc_th->gc_rwsem);
+
 	sbi->gc_thread = gc_th;
-	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
-	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+
+	return 0;
+}
+
+void destroy_gc_context(struct f2fs_sb_info *sbi)
+{
+	kfree(GC_I(sbi));
+	sbi->gc_thread = NULL;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	dev_t dev = sbi->sb->s_bdev->bd_dev;
+	int err = 0;
+
+	init_waitqueue_head(&gc_th->gc_wait_queue_head);
+	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
 			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(gc_th->f2fs_gc_task)) {
 		err = PTR_ERR(gc_th->f2fs_gc_task);
-		kfree(gc_th);
-		sbi->gc_thread = NULL;
+		gc_th->f2fs_gc_task = NULL;
 	}
-out:
+
 	return err;
 }
 
-void stop_gc_thread(struct f2fs_sb_info *sbi)
+bool stop_gc_thread(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-	if (!gc_th)
-		return;
-	kthread_stop(gc_th->f2fs_gc_task);
-	kfree(gc_th);
-	sbi->gc_thread = NULL;
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	bool stopped = false;
+
+	down_write(&gc_th->gc_rwsem);
+	if (gc_th->f2fs_gc_task) {
+		kthread_stop(gc_th->f2fs_gc_task);
+		gc_th->f2fs_gc_task = NULL;
+		stopped = true;
+	}
+	up_write(&gc_th->gc_rwsem);
+
+	return stopped;
 }
 
 static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 {
 	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
 
-	if (!gc_th)
-		return gc_mode;
+	down_read(&gc_th->gc_rwsem);
+	if (!gc_th->f2fs_gc_task)
+		goto out;
 
 	if (gc_th->gc_idle) {
 		if (gc_th->gc_idle == 1)
@@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 	}
 	if (gc_th->gc_urgent)
 		gc_mode = GC_GREEDY;
+out:
+	up_read(&gc_th->gc_rwsem);
 	return gc_mode;
 }
 
@@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->max_search = dirty_i->nr_dirty[type];
 		p->ofs_unit = 1;
 	} else {
-		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
+		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
 		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
 		p->max_search = dirty_i->nr_dirty[DIRTY];
 		p->ofs_unit = sbi->segs_per_sec;
 	}
 
 	/* we need to check every dirty segments in the FG_GC case */
-	if (gc_type != FG_GC &&
-			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
+	down_read(&GC_I(sbi)->gc_rwsem);
+	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
+			!GC_I(sbi)->gc_urgent &&
 			p->max_search > sbi->max_victim_search)
 		p->max_search = sbi->max_victim_search;
+	up_read(&GC_I(sbi)->gc_rwsem);
 
 	/* let's select beginning hot/small space first in no_heap mode*/
 	if (test_opt(sbi, NOHEAP) &&
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b0045d4c8d1e..9a5b273328c2 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -26,6 +26,7 @@
 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
 
 struct f2fs_gc_kthread {
+	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
 	struct task_struct *f2fs_gc_task;
 	wait_queue_head_t gc_wait_queue_head;
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5960768d26df..f787eea1b2f6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
 
 	if (test_opt(sbi, LFS))
 		return false;
-	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+	down_read(&GC_I(sbi)->gc_rwsem);
+	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
+		down_read(&GC_I(sbi)->gc_rwsem);
 		return true;
+	}
+	up_read(&GC_I(sbi)->gc_rwsem);
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
 		if (dcc->discard_wake)
 			dcc->discard_wake = 0;
 
-		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+		down_read(&GC_I(sbi)->gc_rwsem);
+		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
 			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
+		up_read(&GC_I(sbi)->gc_rwsem);
 
 		sb_start_intwrite(sbi->sb);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index dedb8e50b440..199f29dce86d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
 	kfree(sbi->raw_super);
 
 	destroy_device_list(sbi);
+	destroy_gc_context(sbi);
 	mempool_destroy(sbi->write_io_dummy);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * option. Also sync the filesystem.
 	 */
 	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
-		if (sbi->gc_thread) {
-			stop_gc_thread(sbi);
-			need_restart_gc = true;
+		need_restart_gc = stop_gc_thread(sbi);
+	} else {
+		down_write(&GC_I(sbi)->gc_rwsem);
+		if (!GC_I(sbi)->f2fs_gc_task) {
+			err = start_gc_thread(sbi);
+			if (err) {
+				up_write(&GC_I(sbi)->gc_rwsem);
+				goto restore_opts;
+			}
+			need_stop_gc = true;
 		}
-	} else if (!sbi->gc_thread) {
-		err = start_gc_thread(sbi);
-		if (err)
-			goto restore_opts;
-		need_stop_gc = true;
+		up_write(&GC_I(sbi)->gc_rwsem);
 	}
 
 	if (*flags & SB_RDONLY ||
@@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_meta_inode;
 	}
 
+	err = init_gc_context(sbi);
+	if (err)
+		goto free_checkpoint;
+
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
 	if (err) {
@@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	destroy_segment_manager(sbi);
 free_devices:
 	destroy_device_list(sbi);
+	destroy_gc_context(sbi);
+free_checkpoint:
 	kfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2c53de9251be..b8d09935e43f 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -46,7 +46,7 @@ struct f2fs_attr {
 static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 {
 	if (struct_type == GC_THREAD)
-		return (unsigned char *)sbi->gc_thread;
+		return (unsigned char *)GC_I(sbi);
 	else if (struct_type == SM_INFO)
 		return (unsigned char *)SM_I(sbi);
 	else if (struct_type == DCC_INFO)
@@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 	if (!strcmp(a->attr.name, "trim_sections"))
 		return -EINVAL;
 
-	*ui = t;
-
-	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
+	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
 		f2fs_reset_iostat(sbi);
-	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
-		sbi->gc_thread->gc_wake = 1;
-		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
+
+	if (a->struct_type == GC_THREAD) {
+		down_write(&GC_I(sbi)->gc_rwsem);
+		if (!GC_I(sbi)->f2fs_gc_task) {
+			up_write(&GC_I(sbi)->gc_rwsem);
+			return -EINVAL;
+		}
+	}
+
+	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
+		GC_I(sbi)->gc_wake = 1;
+		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
 		wake_up_discard_thread(sbi, true);
 	}
 
+	*ui = t;
+
+	if (a->struct_type == GC_THREAD)
+		up_write(&GC_I(sbi)->gc_rwsem);
+
 	return count;
 }
 
-- 
2.15.0.55.gc2ece9dc4de6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-04-24  2:42 ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-24  2:42 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

Thread A			Thread B		Thread C
- f2fs_remount
 - stop_gc_thread
				- f2fs_sbi_store
							- issue_discard_thread
   sbi->gc_thread = NULL;
				  sbi->gc_thread->gc_wake = 1
							  access sbi->gc_thread->gc_urgent

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, keep gc_thread structure valid in sbi all
the time instead of alloc/free it dynamically.

In addition, add a rw semaphore to exclude rw operation in fields of
gc_thread.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
v2:
- add a rw semaphore to exclude rw operation suggested by Jaegeuk.
 fs/f2fs/debug.c   |  3 +--
 fs/f2fs/f2fs.h    |  9 ++++++-
 fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
 fs/f2fs/gc.h      |  1 +
 fs/f2fs/segment.c | 10 +++++--
 fs/f2fs/super.c   | 26 +++++++++++++------
 fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
 7 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a66107b5cfff..0fbd674c66fb 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->cache_mem = 0;
 
 	/* build gc */
-	if (sbi->gc_thread)
-		si->cache_mem += sizeof(struct f2fs_gc_kthread);
+	si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
 	/* build merge flush thread */
 	if (SM_I(sbi)->fcc_info)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8f3ad9662d13..75d3b4875429 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
 	return (struct sit_info *)(SM_I(sbi)->sit_info);
 }
 
+static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
+{
+	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
+}
+
 static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
 {
 	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
@@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
 /*
  * gc.c
  */
+int init_gc_context(struct f2fs_sb_info *sbi);
+void destroy_gc_context(struct f2fs_sb_info * sbi);
 int start_gc_thread(struct f2fs_sb_info *sbi);
-void stop_gc_thread(struct f2fs_sb_info *sbi);
+bool stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
 			unsigned int segno);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 70418b34c5f6..2febb84b2fd6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -26,8 +26,8 @@
 static int gc_thread_func(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
-	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
 	unsigned int wait_ms;
 
 	wait_ms = gc_th->min_sleep_time;
@@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
 	return 0;
 }
 
-int start_gc_thread(struct f2fs_sb_info *sbi)
+int init_gc_context(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_gc_kthread *gc_th;
-	dev_t dev = sbi->sb->s_bdev->bd_dev;
-	int err = 0;
 
 	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-	if (!gc_th) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!gc_th)
+		return -ENOMEM;
+
+	gc_th->f2fs_gc_task = NULL;
 
 	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
 	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
@@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
 	gc_th->gc_urgent = 0;
 	gc_th->gc_wake= 0;
 
+	init_rwsem(&gc_th->gc_rwsem);
+
 	sbi->gc_thread = gc_th;
-	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
-	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+
+	return 0;
+}
+
+void destroy_gc_context(struct f2fs_sb_info *sbi)
+{
+	kfree(GC_I(sbi));
+	sbi->gc_thread = NULL;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	dev_t dev = sbi->sb->s_bdev->bd_dev;
+	int err = 0;
+
+	init_waitqueue_head(&gc_th->gc_wait_queue_head);
+	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
 			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(gc_th->f2fs_gc_task)) {
 		err = PTR_ERR(gc_th->f2fs_gc_task);
-		kfree(gc_th);
-		sbi->gc_thread = NULL;
+		gc_th->f2fs_gc_task = NULL;
 	}
-out:
+
 	return err;
 }
 
-void stop_gc_thread(struct f2fs_sb_info *sbi)
+bool stop_gc_thread(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
-	if (!gc_th)
-		return;
-	kthread_stop(gc_th->f2fs_gc_task);
-	kfree(gc_th);
-	sbi->gc_thread = NULL;
+	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
+	bool stopped = false;
+
+	down_write(&gc_th->gc_rwsem);
+	if (gc_th->f2fs_gc_task) {
+		kthread_stop(gc_th->f2fs_gc_task);
+		gc_th->f2fs_gc_task = NULL;
+		stopped = true;
+	}
+	up_write(&gc_th->gc_rwsem);
+
+	return stopped;
 }
 
 static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 {
 	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
 
-	if (!gc_th)
-		return gc_mode;
+	down_read(&gc_th->gc_rwsem);
+	if (!gc_th->f2fs_gc_task)
+		goto out;
 
 	if (gc_th->gc_idle) {
 		if (gc_th->gc_idle == 1)
@@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 	}
 	if (gc_th->gc_urgent)
 		gc_mode = GC_GREEDY;
+out:
+	up_read(&gc_th->gc_rwsem);
 	return gc_mode;
 }
 
@@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->max_search = dirty_i->nr_dirty[type];
 		p->ofs_unit = 1;
 	} else {
-		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
+		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
 		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
 		p->max_search = dirty_i->nr_dirty[DIRTY];
 		p->ofs_unit = sbi->segs_per_sec;
 	}
 
 	/* we need to check every dirty segments in the FG_GC case */
-	if (gc_type != FG_GC &&
-			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
+	down_read(&GC_I(sbi)->gc_rwsem);
+	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
+			!GC_I(sbi)->gc_urgent &&
 			p->max_search > sbi->max_victim_search)
 		p->max_search = sbi->max_victim_search;
+	up_read(&GC_I(sbi)->gc_rwsem);
 
 	/* let's select beginning hot/small space first in no_heap mode*/
 	if (test_opt(sbi, NOHEAP) &&
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b0045d4c8d1e..9a5b273328c2 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -26,6 +26,7 @@
 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
 
 struct f2fs_gc_kthread {
+	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
 	struct task_struct *f2fs_gc_task;
 	wait_queue_head_t gc_wait_queue_head;
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5960768d26df..f787eea1b2f6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
 
 	if (test_opt(sbi, LFS))
 		return false;
-	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+	down_read(&GC_I(sbi)->gc_rwsem);
+	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
+		down_read(&GC_I(sbi)->gc_rwsem);
 		return true;
+	}
+	up_read(&GC_I(sbi)->gc_rwsem);
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
 		if (dcc->discard_wake)
 			dcc->discard_wake = 0;
 
-		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+		down_read(&GC_I(sbi)->gc_rwsem);
+		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
 			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
+		up_read(&GC_I(sbi)->gc_rwsem);
 
 		sb_start_intwrite(sbi->sb);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index dedb8e50b440..199f29dce86d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
 	kfree(sbi->raw_super);
 
 	destroy_device_list(sbi);
+	destroy_gc_context(sbi);
 	mempool_destroy(sbi->write_io_dummy);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
@@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * option. Also sync the filesystem.
 	 */
 	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
-		if (sbi->gc_thread) {
-			stop_gc_thread(sbi);
-			need_restart_gc = true;
+		need_restart_gc = stop_gc_thread(sbi);
+	} else {
+		down_write(&GC_I(sbi)->gc_rwsem);
+		if (!GC_I(sbi)->f2fs_gc_task) {
+			err = start_gc_thread(sbi);
+			if (err) {
+				up_write(&GC_I(sbi)->gc_rwsem);
+				goto restore_opts;
+			}
+			need_stop_gc = true;
 		}
-	} else if (!sbi->gc_thread) {
-		err = start_gc_thread(sbi);
-		if (err)
-			goto restore_opts;
-		need_stop_gc = true;
+		up_write(&GC_I(sbi)->gc_rwsem);
 	}
 
 	if (*flags & SB_RDONLY ||
@@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_meta_inode;
 	}
 
+	err = init_gc_context(sbi);
+	if (err)
+		goto free_checkpoint;
+
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
 	if (err) {
@@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	destroy_segment_manager(sbi);
 free_devices:
 	destroy_device_list(sbi);
+	destroy_gc_context(sbi);
+free_checkpoint:
 	kfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2c53de9251be..b8d09935e43f 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -46,7 +46,7 @@ struct f2fs_attr {
 static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 {
 	if (struct_type == GC_THREAD)
-		return (unsigned char *)sbi->gc_thread;
+		return (unsigned char *)GC_I(sbi);
 	else if (struct_type == SM_INFO)
 		return (unsigned char *)SM_I(sbi);
 	else if (struct_type == DCC_INFO)
@@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 	if (!strcmp(a->attr.name, "trim_sections"))
 		return -EINVAL;
 
-	*ui = t;
-
-	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
+	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
 		f2fs_reset_iostat(sbi);
-	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
-		sbi->gc_thread->gc_wake = 1;
-		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
+
+	if (a->struct_type == GC_THREAD) {
+		down_write(&GC_I(sbi)->gc_rwsem);
+		if (!GC_I(sbi)->f2fs_gc_task) {
+			up_write(&GC_I(sbi)->gc_rwsem);
+			return -EINVAL;
+		}
+	}
+
+	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
+		GC_I(sbi)->gc_wake = 1;
+		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
 		wake_up_discard_thread(sbi, true);
 	}
 
+	*ui = t;
+
+	if (a->struct_type == GC_THREAD)
+		up_write(&GC_I(sbi)->gc_rwsem);
+
 	return count;
 }
 
-- 
2.15.0.55.gc2ece9dc4de6

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-24  2:42 ` Chao Yu
  (?)
@ 2018-04-26 16:03 ` Jaegeuk Kim
  2018-04-27  2:04     ` Chao Yu
  -1 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2018-04-26 16:03 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 04/24, Chao Yu wrote:
> Thread A			Thread B		Thread C
> - f2fs_remount
>  - stop_gc_thread
> 				- f2fs_sbi_store
> 							- issue_discard_thread
>    sbi->gc_thread = NULL;
> 				  sbi->gc_thread->gc_wake = 1
> 							  access sbi->gc_thread->gc_urgent
> 
> Previously, we allocate memory for sbi->gc_thread based on background
> gc thread mount option, the memory can be released if we turn off
> that mount option, but still there are several places access gc_thread
> pointer without considering race condition, result in NULL point
> dereference.

Do we just need to check &sb->s_umount in f2fs_sbi_store() and
issue_discard_thread?

> 
> In order to fix this issue, keep gc_thread structure valid in sbi all
> the time instead of alloc/free it dynamically.
> 
> In addition, add a rw semaphore to exclude rw operation in fields of
> gc_thread.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
> v2:
> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>  fs/f2fs/debug.c   |  3 +--
>  fs/f2fs/f2fs.h    |  9 ++++++-
>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>  fs/f2fs/gc.h      |  1 +
>  fs/f2fs/segment.c | 10 +++++--
>  fs/f2fs/super.c   | 26 +++++++++++++------
>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>  7 files changed, 107 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index a66107b5cfff..0fbd674c66fb 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>  	si->cache_mem = 0;
>  
>  	/* build gc */
> -	if (sbi->gc_thread)
> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>  
>  	/* build merge flush thread */
>  	if (SM_I(sbi)->fcc_info)
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 8f3ad9662d13..75d3b4875429 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>  }
>  
> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
> +{
> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
> +}
> +
>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>  {
>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>  /*
>   * gc.c
>   */
> +int init_gc_context(struct f2fs_sb_info *sbi);
> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>  int start_gc_thread(struct f2fs_sb_info *sbi);
> -void stop_gc_thread(struct f2fs_sb_info *sbi);
> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>  			unsigned int segno);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 70418b34c5f6..2febb84b2fd6 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -26,8 +26,8 @@
>  static int gc_thread_func(void *data)
>  {
>  	struct f2fs_sb_info *sbi = data;
> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>  	unsigned int wait_ms;
>  
>  	wait_ms = gc_th->min_sleep_time;
> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>  	return 0;
>  }
>  
> -int start_gc_thread(struct f2fs_sb_info *sbi)
> +int init_gc_context(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_gc_kthread *gc_th;
> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
> -	int err = 0;
>  
>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> -	if (!gc_th) {
> -		err = -ENOMEM;
> -		goto out;
> -	}
> +	if (!gc_th)
> +		return -ENOMEM;
> +
> +	gc_th->f2fs_gc_task = NULL;
>  
>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>  	gc_th->gc_urgent = 0;
>  	gc_th->gc_wake= 0;
>  
> +	init_rwsem(&gc_th->gc_rwsem);
> +
>  	sbi->gc_thread = gc_th;
> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> +
> +	return 0;
> +}
> +
> +void destroy_gc_context(struct f2fs_sb_info *sbi)
> +{
> +	kfree(GC_I(sbi));
> +	sbi->gc_thread = NULL;
> +}
> +
> +int start_gc_thread(struct f2fs_sb_info *sbi)
> +{
> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
> +	int err = 0;
> +
> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>  		err = PTR_ERR(gc_th->f2fs_gc_task);
> -		kfree(gc_th);
> -		sbi->gc_thread = NULL;
> +		gc_th->f2fs_gc_task = NULL;
>  	}
> -out:
> +
>  	return err;
>  }
>  
> -void stop_gc_thread(struct f2fs_sb_info *sbi)
> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>  {
> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> -	if (!gc_th)
> -		return;
> -	kthread_stop(gc_th->f2fs_gc_task);
> -	kfree(gc_th);
> -	sbi->gc_thread = NULL;
> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> +	bool stopped = false;
> +
> +	down_write(&gc_th->gc_rwsem);
> +	if (gc_th->f2fs_gc_task) {
> +		kthread_stop(gc_th->f2fs_gc_task);
> +		gc_th->f2fs_gc_task = NULL;
> +		stopped = true;
> +	}
> +	up_write(&gc_th->gc_rwsem);
> +
> +	return stopped;
>  }
>  
>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>  {
>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>  
> -	if (!gc_th)
> -		return gc_mode;
> +	down_read(&gc_th->gc_rwsem);
> +	if (!gc_th->f2fs_gc_task)
> +		goto out;
>  
>  	if (gc_th->gc_idle) {
>  		if (gc_th->gc_idle == 1)
> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>  	}
>  	if (gc_th->gc_urgent)
>  		gc_mode = GC_GREEDY;
> +out:
> +	up_read(&gc_th->gc_rwsem);
>  	return gc_mode;
>  }
>  
> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>  		p->max_search = dirty_i->nr_dirty[type];
>  		p->ofs_unit = 1;
>  	} else {
> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>  		p->ofs_unit = sbi->segs_per_sec;
>  	}
>  
>  	/* we need to check every dirty segments in the FG_GC case */
> -	if (gc_type != FG_GC &&
> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
> +	down_read(&GC_I(sbi)->gc_rwsem);
> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
> +			!GC_I(sbi)->gc_urgent &&
>  			p->max_search > sbi->max_victim_search)
>  		p->max_search = sbi->max_victim_search;
> +	up_read(&GC_I(sbi)->gc_rwsem);
>  
>  	/* let's select beginning hot/small space first in no_heap mode*/
>  	if (test_opt(sbi, NOHEAP) &&
> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> index b0045d4c8d1e..9a5b273328c2 100644
> --- a/fs/f2fs/gc.h
> +++ b/fs/f2fs/gc.h
> @@ -26,6 +26,7 @@
>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>  
>  struct f2fs_gc_kthread {
> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>  	struct task_struct *f2fs_gc_task;
>  	wait_queue_head_t gc_wait_queue_head;
>  
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 5960768d26df..f787eea1b2f6 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>  
>  	if (test_opt(sbi, LFS))
>  		return false;
> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> +	down_read(&GC_I(sbi)->gc_rwsem);
> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
> +		down_read(&GC_I(sbi)->gc_rwsem);
>  		return true;
> +	}
> +	up_read(&GC_I(sbi)->gc_rwsem);
>  
>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>  		if (dcc->discard_wake)
>  			dcc->discard_wake = 0;
>  
> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> +		down_read(&GC_I(sbi)->gc_rwsem);
> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
> +		up_read(&GC_I(sbi)->gc_rwsem);
>  
>  		sb_start_intwrite(sbi->sb);
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index dedb8e50b440..199f29dce86d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>  	kfree(sbi->raw_super);
>  
>  	destroy_device_list(sbi);
> +	destroy_gc_context(sbi);
>  	mempool_destroy(sbi->write_io_dummy);
>  #ifdef CONFIG_QUOTA
>  	for (i = 0; i < MAXQUOTAS; i++)
> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>  	 * option. Also sync the filesystem.
>  	 */
>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
> -		if (sbi->gc_thread) {
> -			stop_gc_thread(sbi);
> -			need_restart_gc = true;
> +		need_restart_gc = stop_gc_thread(sbi);
> +	} else {
> +		down_write(&GC_I(sbi)->gc_rwsem);
> +		if (!GC_I(sbi)->f2fs_gc_task) {
> +			err = start_gc_thread(sbi);
> +			if (err) {
> +				up_write(&GC_I(sbi)->gc_rwsem);
> +				goto restore_opts;
> +			}
> +			need_stop_gc = true;
>  		}
> -	} else if (!sbi->gc_thread) {
> -		err = start_gc_thread(sbi);
> -		if (err)
> -			goto restore_opts;
> -		need_stop_gc = true;
> +		up_write(&GC_I(sbi)->gc_rwsem);
>  	}
>  
>  	if (*flags & SB_RDONLY ||
> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  		goto free_meta_inode;
>  	}
>  
> +	err = init_gc_context(sbi);
> +	if (err)
> +		goto free_checkpoint;
> +
>  	/* Initialize device list */
>  	err = f2fs_scan_devices(sbi);
>  	if (err) {
> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>  	destroy_segment_manager(sbi);
>  free_devices:
>  	destroy_device_list(sbi);
> +	destroy_gc_context(sbi);
> +free_checkpoint:
>  	kfree(sbi->ckpt);
>  free_meta_inode:
>  	make_bad_inode(sbi->meta_inode);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 2c53de9251be..b8d09935e43f 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -46,7 +46,7 @@ struct f2fs_attr {
>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>  {
>  	if (struct_type == GC_THREAD)
> -		return (unsigned char *)sbi->gc_thread;
> +		return (unsigned char *)GC_I(sbi);
>  	else if (struct_type == SM_INFO)
>  		return (unsigned char *)SM_I(sbi);
>  	else if (struct_type == DCC_INFO)
> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>  	if (!strcmp(a->attr.name, "trim_sections"))
>  		return -EINVAL;
>  
> -	*ui = t;
> -
> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>  		f2fs_reset_iostat(sbi);
> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
> -		sbi->gc_thread->gc_wake = 1;
> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
> +
> +	if (a->struct_type == GC_THREAD) {
> +		down_write(&GC_I(sbi)->gc_rwsem);
> +		if (!GC_I(sbi)->f2fs_gc_task) {
> +			up_write(&GC_I(sbi)->gc_rwsem);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
> +		GC_I(sbi)->gc_wake = 1;
> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>  		wake_up_discard_thread(sbi, true);
>  	}
>  
> +	*ui = t;
> +
> +	if (a->struct_type == GC_THREAD)
> +		up_write(&GC_I(sbi)->gc_rwsem);
> +
>  	return count;
>  }
>  
> -- 
> 2.15.0.55.gc2ece9dc4de6

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-26 16:03 ` Jaegeuk Kim
@ 2018-04-27  2:04     ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-27  2:04 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/27 0:03, Jaegeuk Kim wrote:
> On 04/24, Chao Yu wrote:
>> Thread A			Thread B		Thread C
>> - f2fs_remount
>>  - stop_gc_thread
>> 				- f2fs_sbi_store
>> 							- issue_discard_thread
>>    sbi->gc_thread = NULL;
>> 				  sbi->gc_thread->gc_wake = 1
>> 							  access sbi->gc_thread->gc_urgent
>>
>> Previously, we allocate memory for sbi->gc_thread based on background
>> gc thread mount option, the memory can be released if we turn off
>> that mount option, but still there are several places access gc_thread
>> pointer without considering race condition, result in NULL point
>> dereference.
> 
> Do we just need to check &sb->s_umount in f2fs_sbi_store() and

Better,

> issue_discard_thread?

There is more cases can be called from different scenarios:
- select_policy()
- need_SSR()

Thanks,

> 
>>
>> In order to fix this issue, keep gc_thread structure valid in sbi all
>> the time instead of alloc/free it dynamically.
>>
>> In addition, add a rw semaphore to exclude rw operation in fields of
>> gc_thread.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>> v2:
>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>  fs/f2fs/debug.c   |  3 +--
>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>  fs/f2fs/gc.h      |  1 +
>>  fs/f2fs/segment.c | 10 +++++--
>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index a66107b5cfff..0fbd674c66fb 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>  	si->cache_mem = 0;
>>  
>>  	/* build gc */
>> -	if (sbi->gc_thread)
>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>  
>>  	/* build merge flush thread */
>>  	if (SM_I(sbi)->fcc_info)
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 8f3ad9662d13..75d3b4875429 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>  }
>>  
>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>> +{
>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>> +}
>> +
>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>  {
>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>  /*
>>   * gc.c
>>   */
>> +int init_gc_context(struct f2fs_sb_info *sbi);
>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>  			unsigned int segno);
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index 70418b34c5f6..2febb84b2fd6 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -26,8 +26,8 @@
>>  static int gc_thread_func(void *data)
>>  {
>>  	struct f2fs_sb_info *sbi = data;
>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>  	unsigned int wait_ms;
>>  
>>  	wait_ms = gc_th->min_sleep_time;
>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>  	return 0;
>>  }
>>  
>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>  {
>>  	struct f2fs_gc_kthread *gc_th;
>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>> -	int err = 0;
>>  
>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>> -	if (!gc_th) {
>> -		err = -ENOMEM;
>> -		goto out;
>> -	}
>> +	if (!gc_th)
>> +		return -ENOMEM;
>> +
>> +	gc_th->f2fs_gc_task = NULL;
>>  
>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>  	gc_th->gc_urgent = 0;
>>  	gc_th->gc_wake= 0;
>>  
>> +	init_rwsem(&gc_th->gc_rwsem);
>> +
>>  	sbi->gc_thread = gc_th;
>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>> +
>> +	return 0;
>> +}
>> +
>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>> +{
>> +	kfree(GC_I(sbi));
>> +	sbi->gc_thread = NULL;
>> +}
>> +
>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>> +{
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>> +	int err = 0;
>> +
>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>> -		kfree(gc_th);
>> -		sbi->gc_thread = NULL;
>> +		gc_th->f2fs_gc_task = NULL;
>>  	}
>> -out:
>> +
>>  	return err;
>>  }
>>  
>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>  {
>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>> -	if (!gc_th)
>> -		return;
>> -	kthread_stop(gc_th->f2fs_gc_task);
>> -	kfree(gc_th);
>> -	sbi->gc_thread = NULL;
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	bool stopped = false;
>> +
>> +	down_write(&gc_th->gc_rwsem);
>> +	if (gc_th->f2fs_gc_task) {
>> +		kthread_stop(gc_th->f2fs_gc_task);
>> +		gc_th->f2fs_gc_task = NULL;
>> +		stopped = true;
>> +	}
>> +	up_write(&gc_th->gc_rwsem);
>> +
>> +	return stopped;
>>  }
>>  
>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>  {
>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>  
>> -	if (!gc_th)
>> -		return gc_mode;
>> +	down_read(&gc_th->gc_rwsem);
>> +	if (!gc_th->f2fs_gc_task)
>> +		goto out;
>>  
>>  	if (gc_th->gc_idle) {
>>  		if (gc_th->gc_idle == 1)
>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>  	}
>>  	if (gc_th->gc_urgent)
>>  		gc_mode = GC_GREEDY;
>> +out:
>> +	up_read(&gc_th->gc_rwsem);
>>  	return gc_mode;
>>  }
>>  
>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>  		p->max_search = dirty_i->nr_dirty[type];
>>  		p->ofs_unit = 1;
>>  	} else {
>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>  		p->ofs_unit = sbi->segs_per_sec;
>>  	}
>>  
>>  	/* we need to check every dirty segments in the FG_GC case */
>> -	if (gc_type != FG_GC &&
>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>> +	down_read(&GC_I(sbi)->gc_rwsem);
>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>> +			!GC_I(sbi)->gc_urgent &&
>>  			p->max_search > sbi->max_victim_search)
>>  		p->max_search = sbi->max_victim_search;
>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>  	if (test_opt(sbi, NOHEAP) &&
>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>> index b0045d4c8d1e..9a5b273328c2 100644
>> --- a/fs/f2fs/gc.h
>> +++ b/fs/f2fs/gc.h
>> @@ -26,6 +26,7 @@
>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>  
>>  struct f2fs_gc_kthread {
>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>  	struct task_struct *f2fs_gc_task;
>>  	wait_queue_head_t gc_wait_queue_head;
>>  
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 5960768d26df..f787eea1b2f6 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>  
>>  	if (test_opt(sbi, LFS))
>>  		return false;
>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>> +	down_read(&GC_I(sbi)->gc_rwsem);
>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>  		return true;
>> +	}
>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>  		if (dcc->discard_wake)
>>  			dcc->discard_wake = 0;
>>  
>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>> +		down_read(&GC_I(sbi)->gc_rwsem);
>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  		sb_start_intwrite(sbi->sb);
>>  
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index dedb8e50b440..199f29dce86d 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>  	kfree(sbi->raw_super);
>>  
>>  	destroy_device_list(sbi);
>> +	destroy_gc_context(sbi);
>>  	mempool_destroy(sbi->write_io_dummy);
>>  #ifdef CONFIG_QUOTA
>>  	for (i = 0; i < MAXQUOTAS; i++)
>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>  	 * option. Also sync the filesystem.
>>  	 */
>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>> -		if (sbi->gc_thread) {
>> -			stop_gc_thread(sbi);
>> -			need_restart_gc = true;
>> +		need_restart_gc = stop_gc_thread(sbi);
>> +	} else {
>> +		down_write(&GC_I(sbi)->gc_rwsem);
>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>> +			err = start_gc_thread(sbi);
>> +			if (err) {
>> +				up_write(&GC_I(sbi)->gc_rwsem);
>> +				goto restore_opts;
>> +			}
>> +			need_stop_gc = true;
>>  		}
>> -	} else if (!sbi->gc_thread) {
>> -		err = start_gc_thread(sbi);
>> -		if (err)
>> -			goto restore_opts;
>> -		need_stop_gc = true;
>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>  	}
>>  
>>  	if (*flags & SB_RDONLY ||
>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>  		goto free_meta_inode;
>>  	}
>>  
>> +	err = init_gc_context(sbi);
>> +	if (err)
>> +		goto free_checkpoint;
>> +
>>  	/* Initialize device list */
>>  	err = f2fs_scan_devices(sbi);
>>  	if (err) {
>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>  	destroy_segment_manager(sbi);
>>  free_devices:
>>  	destroy_device_list(sbi);
>> +	destroy_gc_context(sbi);
>> +free_checkpoint:
>>  	kfree(sbi->ckpt);
>>  free_meta_inode:
>>  	make_bad_inode(sbi->meta_inode);
>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>> index 2c53de9251be..b8d09935e43f 100644
>> --- a/fs/f2fs/sysfs.c
>> +++ b/fs/f2fs/sysfs.c
>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>  {
>>  	if (struct_type == GC_THREAD)
>> -		return (unsigned char *)sbi->gc_thread;
>> +		return (unsigned char *)GC_I(sbi);
>>  	else if (struct_type == SM_INFO)
>>  		return (unsigned char *)SM_I(sbi);
>>  	else if (struct_type == DCC_INFO)
>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>  		return -EINVAL;
>>  
>> -	*ui = t;
>> -
>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>  		f2fs_reset_iostat(sbi);
>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>> -		sbi->gc_thread->gc_wake = 1;
>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>> +
>> +	if (a->struct_type == GC_THREAD) {
>> +		down_write(&GC_I(sbi)->gc_rwsem);
>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>> +			up_write(&GC_I(sbi)->gc_rwsem);
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>> +		GC_I(sbi)->gc_wake = 1;
>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>  		wake_up_discard_thread(sbi, true);
>>  	}
>>  
>> +	*ui = t;
>> +
>> +	if (a->struct_type == GC_THREAD)
>> +		up_write(&GC_I(sbi)->gc_rwsem);
>> +
>>  	return count;
>>  }
>>  
>> -- 
>> 2.15.0.55.gc2ece9dc4de6
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-04-27  2:04     ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-27  2:04 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/27 0:03, Jaegeuk Kim wrote:
> On 04/24, Chao Yu wrote:
>> Thread A			Thread B		Thread C
>> - f2fs_remount
>>  - stop_gc_thread
>> 				- f2fs_sbi_store
>> 							- issue_discard_thread
>>    sbi->gc_thread = NULL;
>> 				  sbi->gc_thread->gc_wake = 1
>> 							  access sbi->gc_thread->gc_urgent
>>
>> Previously, we allocate memory for sbi->gc_thread based on background
>> gc thread mount option, the memory can be released if we turn off
>> that mount option, but still there are several places access gc_thread
>> pointer without considering race condition, result in NULL point
>> dereference.
> 
> Do we just need to check &sb->s_umount in f2fs_sbi_store() and

Better,

> issue_discard_thread?

There is more cases can be called from different scenarios:
- select_policy()
- need_SSR()

Thanks,

> 
>>
>> In order to fix this issue, keep gc_thread structure valid in sbi all
>> the time instead of alloc/free it dynamically.
>>
>> In addition, add a rw semaphore to exclude rw operation in fields of
>> gc_thread.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>> v2:
>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>  fs/f2fs/debug.c   |  3 +--
>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>  fs/f2fs/gc.h      |  1 +
>>  fs/f2fs/segment.c | 10 +++++--
>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index a66107b5cfff..0fbd674c66fb 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>  	si->cache_mem = 0;
>>  
>>  	/* build gc */
>> -	if (sbi->gc_thread)
>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>  
>>  	/* build merge flush thread */
>>  	if (SM_I(sbi)->fcc_info)
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 8f3ad9662d13..75d3b4875429 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>  }
>>  
>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>> +{
>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>> +}
>> +
>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>  {
>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>  /*
>>   * gc.c
>>   */
>> +int init_gc_context(struct f2fs_sb_info *sbi);
>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>  			unsigned int segno);
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index 70418b34c5f6..2febb84b2fd6 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -26,8 +26,8 @@
>>  static int gc_thread_func(void *data)
>>  {
>>  	struct f2fs_sb_info *sbi = data;
>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>  	unsigned int wait_ms;
>>  
>>  	wait_ms = gc_th->min_sleep_time;
>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>  	return 0;
>>  }
>>  
>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>  {
>>  	struct f2fs_gc_kthread *gc_th;
>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>> -	int err = 0;
>>  
>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>> -	if (!gc_th) {
>> -		err = -ENOMEM;
>> -		goto out;
>> -	}
>> +	if (!gc_th)
>> +		return -ENOMEM;
>> +
>> +	gc_th->f2fs_gc_task = NULL;
>>  
>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>  	gc_th->gc_urgent = 0;
>>  	gc_th->gc_wake= 0;
>>  
>> +	init_rwsem(&gc_th->gc_rwsem);
>> +
>>  	sbi->gc_thread = gc_th;
>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>> +
>> +	return 0;
>> +}
>> +
>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>> +{
>> +	kfree(GC_I(sbi));
>> +	sbi->gc_thread = NULL;
>> +}
>> +
>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>> +{
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>> +	int err = 0;
>> +
>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>> -		kfree(gc_th);
>> -		sbi->gc_thread = NULL;
>> +		gc_th->f2fs_gc_task = NULL;
>>  	}
>> -out:
>> +
>>  	return err;
>>  }
>>  
>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>  {
>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>> -	if (!gc_th)
>> -		return;
>> -	kthread_stop(gc_th->f2fs_gc_task);
>> -	kfree(gc_th);
>> -	sbi->gc_thread = NULL;
>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>> +	bool stopped = false;
>> +
>> +	down_write(&gc_th->gc_rwsem);
>> +	if (gc_th->f2fs_gc_task) {
>> +		kthread_stop(gc_th->f2fs_gc_task);
>> +		gc_th->f2fs_gc_task = NULL;
>> +		stopped = true;
>> +	}
>> +	up_write(&gc_th->gc_rwsem);
>> +
>> +	return stopped;
>>  }
>>  
>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>  {
>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>  
>> -	if (!gc_th)
>> -		return gc_mode;
>> +	down_read(&gc_th->gc_rwsem);
>> +	if (!gc_th->f2fs_gc_task)
>> +		goto out;
>>  
>>  	if (gc_th->gc_idle) {
>>  		if (gc_th->gc_idle == 1)
>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>  	}
>>  	if (gc_th->gc_urgent)
>>  		gc_mode = GC_GREEDY;
>> +out:
>> +	up_read(&gc_th->gc_rwsem);
>>  	return gc_mode;
>>  }
>>  
>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>  		p->max_search = dirty_i->nr_dirty[type];
>>  		p->ofs_unit = 1;
>>  	} else {
>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>  		p->ofs_unit = sbi->segs_per_sec;
>>  	}
>>  
>>  	/* we need to check every dirty segments in the FG_GC case */
>> -	if (gc_type != FG_GC &&
>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>> +	down_read(&GC_I(sbi)->gc_rwsem);
>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>> +			!GC_I(sbi)->gc_urgent &&
>>  			p->max_search > sbi->max_victim_search)
>>  		p->max_search = sbi->max_victim_search;
>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>  	if (test_opt(sbi, NOHEAP) &&
>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>> index b0045d4c8d1e..9a5b273328c2 100644
>> --- a/fs/f2fs/gc.h
>> +++ b/fs/f2fs/gc.h
>> @@ -26,6 +26,7 @@
>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>  
>>  struct f2fs_gc_kthread {
>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>  	struct task_struct *f2fs_gc_task;
>>  	wait_queue_head_t gc_wait_queue_head;
>>  
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 5960768d26df..f787eea1b2f6 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>  
>>  	if (test_opt(sbi, LFS))
>>  		return false;
>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>> +	down_read(&GC_I(sbi)->gc_rwsem);
>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>  		return true;
>> +	}
>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>  		if (dcc->discard_wake)
>>  			dcc->discard_wake = 0;
>>  
>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>> +		down_read(&GC_I(sbi)->gc_rwsem);
>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>  
>>  		sb_start_intwrite(sbi->sb);
>>  
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index dedb8e50b440..199f29dce86d 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>  	kfree(sbi->raw_super);
>>  
>>  	destroy_device_list(sbi);
>> +	destroy_gc_context(sbi);
>>  	mempool_destroy(sbi->write_io_dummy);
>>  #ifdef CONFIG_QUOTA
>>  	for (i = 0; i < MAXQUOTAS; i++)
>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>  	 * option. Also sync the filesystem.
>>  	 */
>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>> -		if (sbi->gc_thread) {
>> -			stop_gc_thread(sbi);
>> -			need_restart_gc = true;
>> +		need_restart_gc = stop_gc_thread(sbi);
>> +	} else {
>> +		down_write(&GC_I(sbi)->gc_rwsem);
>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>> +			err = start_gc_thread(sbi);
>> +			if (err) {
>> +				up_write(&GC_I(sbi)->gc_rwsem);
>> +				goto restore_opts;
>> +			}
>> +			need_stop_gc = true;
>>  		}
>> -	} else if (!sbi->gc_thread) {
>> -		err = start_gc_thread(sbi);
>> -		if (err)
>> -			goto restore_opts;
>> -		need_stop_gc = true;
>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>  	}
>>  
>>  	if (*flags & SB_RDONLY ||
>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>  		goto free_meta_inode;
>>  	}
>>  
>> +	err = init_gc_context(sbi);
>> +	if (err)
>> +		goto free_checkpoint;
>> +
>>  	/* Initialize device list */
>>  	err = f2fs_scan_devices(sbi);
>>  	if (err) {
>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>  	destroy_segment_manager(sbi);
>>  free_devices:
>>  	destroy_device_list(sbi);
>> +	destroy_gc_context(sbi);
>> +free_checkpoint:
>>  	kfree(sbi->ckpt);
>>  free_meta_inode:
>>  	make_bad_inode(sbi->meta_inode);
>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>> index 2c53de9251be..b8d09935e43f 100644
>> --- a/fs/f2fs/sysfs.c
>> +++ b/fs/f2fs/sysfs.c
>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>  {
>>  	if (struct_type == GC_THREAD)
>> -		return (unsigned char *)sbi->gc_thread;
>> +		return (unsigned char *)GC_I(sbi);
>>  	else if (struct_type == SM_INFO)
>>  		return (unsigned char *)SM_I(sbi);
>>  	else if (struct_type == DCC_INFO)
>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>  		return -EINVAL;
>>  
>> -	*ui = t;
>> -
>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>  		f2fs_reset_iostat(sbi);
>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>> -		sbi->gc_thread->gc_wake = 1;
>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>> +
>> +	if (a->struct_type == GC_THREAD) {
>> +		down_write(&GC_I(sbi)->gc_rwsem);
>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>> +			up_write(&GC_I(sbi)->gc_rwsem);
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>> +		GC_I(sbi)->gc_wake = 1;
>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>  		wake_up_discard_thread(sbi, true);
>>  	}
>>  
>> +	*ui = t;
>> +
>> +	if (a->struct_type == GC_THREAD)
>> +		up_write(&GC_I(sbi)->gc_rwsem);
>> +
>>  	return count;
>>  }
>>  
>> -- 
>> 2.15.0.55.gc2ece9dc4de6
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-27  2:04     ` Chao Yu
@ 2018-04-27  2:07       ` Chao Yu
  -1 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-27  2:07 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/27 10:04, Chao Yu wrote:
> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>> On 04/24, Chao Yu wrote:
>>> Thread A			Thread B		Thread C
>>> - f2fs_remount
>>>  - stop_gc_thread
>>> 				- f2fs_sbi_store
>>> 							- issue_discard_thread
>>>    sbi->gc_thread = NULL;
>>> 				  sbi->gc_thread->gc_wake = 1
>>> 							  access sbi->gc_thread->gc_urgent
>>>
>>> Previously, we allocate memory for sbi->gc_thread based on background
>>> gc thread mount option, the memory can be released if we turn off
>>> that mount option, but still there are several places access gc_thread
>>> pointer without considering race condition, result in NULL point
>>> dereference.
>>
>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> 
> Better,
> 
>> issue_discard_thread?
> 
> There is more cases can be called from different scenarios:
> - select_policy()
> - need_SSR()

BTW, do you agree with introducing {init,destroy}_gc_context as the patch does?

Thanks,

> 
> Thanks,
> 
>>
>>>
>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>> the time instead of alloc/free it dynamically.
>>>
>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>> gc_thread.
>>>
>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>> ---
>>> v2:
>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>  fs/f2fs/debug.c   |  3 +--
>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>  fs/f2fs/gc.h      |  1 +
>>>  fs/f2fs/segment.c | 10 +++++--
>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>> index a66107b5cfff..0fbd674c66fb 100644
>>> --- a/fs/f2fs/debug.c
>>> +++ b/fs/f2fs/debug.c
>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>  	si->cache_mem = 0;
>>>  
>>>  	/* build gc */
>>> -	if (sbi->gc_thread)
>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>  
>>>  	/* build merge flush thread */
>>>  	if (SM_I(sbi)->fcc_info)
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 8f3ad9662d13..75d3b4875429 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>  }
>>>  
>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>> +{
>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>> +}
>>> +
>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>  {
>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>  /*
>>>   * gc.c
>>>   */
>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>  			unsigned int segno);
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index 70418b34c5f6..2febb84b2fd6 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -26,8 +26,8 @@
>>>  static int gc_thread_func(void *data)
>>>  {
>>>  	struct f2fs_sb_info *sbi = data;
>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>  	unsigned int wait_ms;
>>>  
>>>  	wait_ms = gc_th->min_sleep_time;
>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>  	return 0;
>>>  }
>>>  
>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>  {
>>>  	struct f2fs_gc_kthread *gc_th;
>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>> -	int err = 0;
>>>  
>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>> -	if (!gc_th) {
>>> -		err = -ENOMEM;
>>> -		goto out;
>>> -	}
>>> +	if (!gc_th)
>>> +		return -ENOMEM;
>>> +
>>> +	gc_th->f2fs_gc_task = NULL;
>>>  
>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>  	gc_th->gc_urgent = 0;
>>>  	gc_th->gc_wake= 0;
>>>  
>>> +	init_rwsem(&gc_th->gc_rwsem);
>>> +
>>>  	sbi->gc_thread = gc_th;
>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>> +{
>>> +	kfree(GC_I(sbi));
>>> +	sbi->gc_thread = NULL;
>>> +}
>>> +
>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>> +{
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>> +	int err = 0;
>>> +
>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>> -		kfree(gc_th);
>>> -		sbi->gc_thread = NULL;
>>> +		gc_th->f2fs_gc_task = NULL;
>>>  	}
>>> -out:
>>> +
>>>  	return err;
>>>  }
>>>  
>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>  {
>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>> -	if (!gc_th)
>>> -		return;
>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>> -	kfree(gc_th);
>>> -	sbi->gc_thread = NULL;
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	bool stopped = false;
>>> +
>>> +	down_write(&gc_th->gc_rwsem);
>>> +	if (gc_th->f2fs_gc_task) {
>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>> +		gc_th->f2fs_gc_task = NULL;
>>> +		stopped = true;
>>> +	}
>>> +	up_write(&gc_th->gc_rwsem);
>>> +
>>> +	return stopped;
>>>  }
>>>  
>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>  {
>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>  
>>> -	if (!gc_th)
>>> -		return gc_mode;
>>> +	down_read(&gc_th->gc_rwsem);
>>> +	if (!gc_th->f2fs_gc_task)
>>> +		goto out;
>>>  
>>>  	if (gc_th->gc_idle) {
>>>  		if (gc_th->gc_idle == 1)
>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>  	}
>>>  	if (gc_th->gc_urgent)
>>>  		gc_mode = GC_GREEDY;
>>> +out:
>>> +	up_read(&gc_th->gc_rwsem);
>>>  	return gc_mode;
>>>  }
>>>  
>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>  		p->ofs_unit = 1;
>>>  	} else {
>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>  	}
>>>  
>>>  	/* we need to check every dirty segments in the FG_GC case */
>>> -	if (gc_type != FG_GC &&
>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>> +			!GC_I(sbi)->gc_urgent &&
>>>  			p->max_search > sbi->max_victim_search)
>>>  		p->max_search = sbi->max_victim_search;
>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>  	if (test_opt(sbi, NOHEAP) &&
>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>> index b0045d4c8d1e..9a5b273328c2 100644
>>> --- a/fs/f2fs/gc.h
>>> +++ b/fs/f2fs/gc.h
>>> @@ -26,6 +26,7 @@
>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>  
>>>  struct f2fs_gc_kthread {
>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>  	struct task_struct *f2fs_gc_task;
>>>  	wait_queue_head_t gc_wait_queue_head;
>>>  
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 5960768d26df..f787eea1b2f6 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>  
>>>  	if (test_opt(sbi, LFS))
>>>  		return false;
>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>  		return true;
>>> +	}
>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>  		if (dcc->discard_wake)
>>>  			dcc->discard_wake = 0;
>>>  
>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  		sb_start_intwrite(sbi->sb);
>>>  
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index dedb8e50b440..199f29dce86d 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>  	kfree(sbi->raw_super);
>>>  
>>>  	destroy_device_list(sbi);
>>> +	destroy_gc_context(sbi);
>>>  	mempool_destroy(sbi->write_io_dummy);
>>>  #ifdef CONFIG_QUOTA
>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>  	 * option. Also sync the filesystem.
>>>  	 */
>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>> -		if (sbi->gc_thread) {
>>> -			stop_gc_thread(sbi);
>>> -			need_restart_gc = true;
>>> +		need_restart_gc = stop_gc_thread(sbi);
>>> +	} else {
>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>> +			err = start_gc_thread(sbi);
>>> +			if (err) {
>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>> +				goto restore_opts;
>>> +			}
>>> +			need_stop_gc = true;
>>>  		}
>>> -	} else if (!sbi->gc_thread) {
>>> -		err = start_gc_thread(sbi);
>>> -		if (err)
>>> -			goto restore_opts;
>>> -		need_stop_gc = true;
>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>  	}
>>>  
>>>  	if (*flags & SB_RDONLY ||
>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  		goto free_meta_inode;
>>>  	}
>>>  
>>> +	err = init_gc_context(sbi);
>>> +	if (err)
>>> +		goto free_checkpoint;
>>> +
>>>  	/* Initialize device list */
>>>  	err = f2fs_scan_devices(sbi);
>>>  	if (err) {
>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  	destroy_segment_manager(sbi);
>>>  free_devices:
>>>  	destroy_device_list(sbi);
>>> +	destroy_gc_context(sbi);
>>> +free_checkpoint:
>>>  	kfree(sbi->ckpt);
>>>  free_meta_inode:
>>>  	make_bad_inode(sbi->meta_inode);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 2c53de9251be..b8d09935e43f 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>  {
>>>  	if (struct_type == GC_THREAD)
>>> -		return (unsigned char *)sbi->gc_thread;
>>> +		return (unsigned char *)GC_I(sbi);
>>>  	else if (struct_type == SM_INFO)
>>>  		return (unsigned char *)SM_I(sbi);
>>>  	else if (struct_type == DCC_INFO)
>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>  		return -EINVAL;
>>>  
>>> -	*ui = t;
>>> -
>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>  		f2fs_reset_iostat(sbi);
>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>> -		sbi->gc_thread->gc_wake = 1;
>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>> +
>>> +	if (a->struct_type == GC_THREAD) {
>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>> +			return -EINVAL;
>>> +		}
>>> +	}
>>> +
>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>> +		GC_I(sbi)->gc_wake = 1;
>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>  		wake_up_discard_thread(sbi, true);
>>>  	}
>>>  
>>> +	*ui = t;
>>> +
>>> +	if (a->struct_type == GC_THREAD)
>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>> +
>>>  	return count;
>>>  }
>>>  
>>> -- 
>>> 2.15.0.55.gc2ece9dc4de6
>>
>> .
>>
> 
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-04-27  2:07       ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-04-27  2:07 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/27 10:04, Chao Yu wrote:
> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>> On 04/24, Chao Yu wrote:
>>> Thread A			Thread B		Thread C
>>> - f2fs_remount
>>>  - stop_gc_thread
>>> 				- f2fs_sbi_store
>>> 							- issue_discard_thread
>>>    sbi->gc_thread = NULL;
>>> 				  sbi->gc_thread->gc_wake = 1
>>> 							  access sbi->gc_thread->gc_urgent
>>>
>>> Previously, we allocate memory for sbi->gc_thread based on background
>>> gc thread mount option, the memory can be released if we turn off
>>> that mount option, but still there are several places access gc_thread
>>> pointer without considering race condition, result in NULL point
>>> dereference.
>>
>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> 
> Better,
> 
>> issue_discard_thread?
> 
> There is more cases can be called from different scenarios:
> - select_policy()
> - need_SSR()

BTW, do you agree with introducing {init,destroy}_gc_context as the patch does?

Thanks,

> 
> Thanks,
> 
>>
>>>
>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>> the time instead of alloc/free it dynamically.
>>>
>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>> gc_thread.
>>>
>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>> ---
>>> v2:
>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>  fs/f2fs/debug.c   |  3 +--
>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>  fs/f2fs/gc.h      |  1 +
>>>  fs/f2fs/segment.c | 10 +++++--
>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>> index a66107b5cfff..0fbd674c66fb 100644
>>> --- a/fs/f2fs/debug.c
>>> +++ b/fs/f2fs/debug.c
>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>  	si->cache_mem = 0;
>>>  
>>>  	/* build gc */
>>> -	if (sbi->gc_thread)
>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>  
>>>  	/* build merge flush thread */
>>>  	if (SM_I(sbi)->fcc_info)
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 8f3ad9662d13..75d3b4875429 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>  }
>>>  
>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>> +{
>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>> +}
>>> +
>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>  {
>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>  /*
>>>   * gc.c
>>>   */
>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>  			unsigned int segno);
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index 70418b34c5f6..2febb84b2fd6 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -26,8 +26,8 @@
>>>  static int gc_thread_func(void *data)
>>>  {
>>>  	struct f2fs_sb_info *sbi = data;
>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>  	unsigned int wait_ms;
>>>  
>>>  	wait_ms = gc_th->min_sleep_time;
>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>  	return 0;
>>>  }
>>>  
>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>  {
>>>  	struct f2fs_gc_kthread *gc_th;
>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>> -	int err = 0;
>>>  
>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>> -	if (!gc_th) {
>>> -		err = -ENOMEM;
>>> -		goto out;
>>> -	}
>>> +	if (!gc_th)
>>> +		return -ENOMEM;
>>> +
>>> +	gc_th->f2fs_gc_task = NULL;
>>>  
>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>  	gc_th->gc_urgent = 0;
>>>  	gc_th->gc_wake= 0;
>>>  
>>> +	init_rwsem(&gc_th->gc_rwsem);
>>> +
>>>  	sbi->gc_thread = gc_th;
>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>> +{
>>> +	kfree(GC_I(sbi));
>>> +	sbi->gc_thread = NULL;
>>> +}
>>> +
>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>> +{
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>> +	int err = 0;
>>> +
>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>> -		kfree(gc_th);
>>> -		sbi->gc_thread = NULL;
>>> +		gc_th->f2fs_gc_task = NULL;
>>>  	}
>>> -out:
>>> +
>>>  	return err;
>>>  }
>>>  
>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>  {
>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>> -	if (!gc_th)
>>> -		return;
>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>> -	kfree(gc_th);
>>> -	sbi->gc_thread = NULL;
>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>> +	bool stopped = false;
>>> +
>>> +	down_write(&gc_th->gc_rwsem);
>>> +	if (gc_th->f2fs_gc_task) {
>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>> +		gc_th->f2fs_gc_task = NULL;
>>> +		stopped = true;
>>> +	}
>>> +	up_write(&gc_th->gc_rwsem);
>>> +
>>> +	return stopped;
>>>  }
>>>  
>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>  {
>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>  
>>> -	if (!gc_th)
>>> -		return gc_mode;
>>> +	down_read(&gc_th->gc_rwsem);
>>> +	if (!gc_th->f2fs_gc_task)
>>> +		goto out;
>>>  
>>>  	if (gc_th->gc_idle) {
>>>  		if (gc_th->gc_idle == 1)
>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>  	}
>>>  	if (gc_th->gc_urgent)
>>>  		gc_mode = GC_GREEDY;
>>> +out:
>>> +	up_read(&gc_th->gc_rwsem);
>>>  	return gc_mode;
>>>  }
>>>  
>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>  		p->ofs_unit = 1;
>>>  	} else {
>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>  	}
>>>  
>>>  	/* we need to check every dirty segments in the FG_GC case */
>>> -	if (gc_type != FG_GC &&
>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>> +			!GC_I(sbi)->gc_urgent &&
>>>  			p->max_search > sbi->max_victim_search)
>>>  		p->max_search = sbi->max_victim_search;
>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>  	if (test_opt(sbi, NOHEAP) &&
>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>> index b0045d4c8d1e..9a5b273328c2 100644
>>> --- a/fs/f2fs/gc.h
>>> +++ b/fs/f2fs/gc.h
>>> @@ -26,6 +26,7 @@
>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>  
>>>  struct f2fs_gc_kthread {
>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>  	struct task_struct *f2fs_gc_task;
>>>  	wait_queue_head_t gc_wait_queue_head;
>>>  
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 5960768d26df..f787eea1b2f6 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>  
>>>  	if (test_opt(sbi, LFS))
>>>  		return false;
>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>  		return true;
>>> +	}
>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>  		if (dcc->discard_wake)
>>>  			dcc->discard_wake = 0;
>>>  
>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>  
>>>  		sb_start_intwrite(sbi->sb);
>>>  
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index dedb8e50b440..199f29dce86d 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>  	kfree(sbi->raw_super);
>>>  
>>>  	destroy_device_list(sbi);
>>> +	destroy_gc_context(sbi);
>>>  	mempool_destroy(sbi->write_io_dummy);
>>>  #ifdef CONFIG_QUOTA
>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>  	 * option. Also sync the filesystem.
>>>  	 */
>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>> -		if (sbi->gc_thread) {
>>> -			stop_gc_thread(sbi);
>>> -			need_restart_gc = true;
>>> +		need_restart_gc = stop_gc_thread(sbi);
>>> +	} else {
>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>> +			err = start_gc_thread(sbi);
>>> +			if (err) {
>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>> +				goto restore_opts;
>>> +			}
>>> +			need_stop_gc = true;
>>>  		}
>>> -	} else if (!sbi->gc_thread) {
>>> -		err = start_gc_thread(sbi);
>>> -		if (err)
>>> -			goto restore_opts;
>>> -		need_stop_gc = true;
>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>  	}
>>>  
>>>  	if (*flags & SB_RDONLY ||
>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  		goto free_meta_inode;
>>>  	}
>>>  
>>> +	err = init_gc_context(sbi);
>>> +	if (err)
>>> +		goto free_checkpoint;
>>> +
>>>  	/* Initialize device list */
>>>  	err = f2fs_scan_devices(sbi);
>>>  	if (err) {
>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>  	destroy_segment_manager(sbi);
>>>  free_devices:
>>>  	destroy_device_list(sbi);
>>> +	destroy_gc_context(sbi);
>>> +free_checkpoint:
>>>  	kfree(sbi->ckpt);
>>>  free_meta_inode:
>>>  	make_bad_inode(sbi->meta_inode);
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 2c53de9251be..b8d09935e43f 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>  {
>>>  	if (struct_type == GC_THREAD)
>>> -		return (unsigned char *)sbi->gc_thread;
>>> +		return (unsigned char *)GC_I(sbi);
>>>  	else if (struct_type == SM_INFO)
>>>  		return (unsigned char *)SM_I(sbi);
>>>  	else if (struct_type == DCC_INFO)
>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>  		return -EINVAL;
>>>  
>>> -	*ui = t;
>>> -
>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>  		f2fs_reset_iostat(sbi);
>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>> -		sbi->gc_thread->gc_wake = 1;
>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>> +
>>> +	if (a->struct_type == GC_THREAD) {
>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>> +			return -EINVAL;
>>> +		}
>>> +	}
>>> +
>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>> +		GC_I(sbi)->gc_wake = 1;
>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>  		wake_up_discard_thread(sbi, true);
>>>  	}
>>>  
>>> +	*ui = t;
>>> +
>>> +	if (a->struct_type == GC_THREAD)
>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>> +
>>>  	return count;
>>>  }
>>>  
>>> -- 
>>> 2.15.0.55.gc2ece9dc4de6
>>
>> .
>>
> 
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-27  2:04     ` Chao Yu
  (?)
  (?)
@ 2018-04-28  2:36     ` Jaegeuk Kim
  2018-05-02  1:47         ` Chao Yu
  -1 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2018-04-28  2:36 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 04/27, Chao Yu wrote:
> On 2018/4/27 0:03, Jaegeuk Kim wrote:
> > On 04/24, Chao Yu wrote:
> >> Thread A			Thread B		Thread C
> >> - f2fs_remount
> >>  - stop_gc_thread
> >> 				- f2fs_sbi_store
> >> 							- issue_discard_thread
> >>    sbi->gc_thread = NULL;
> >> 				  sbi->gc_thread->gc_wake = 1
> >> 							  access sbi->gc_thread->gc_urgent
> >>
> >> Previously, we allocate memory for sbi->gc_thread based on background
> >> gc thread mount option, the memory can be released if we turn off
> >> that mount option, but still there are several places access gc_thread
> >> pointer without considering race condition, result in NULL point
> >> dereference.
> > 
> > Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> 
> Better,
> 
> > issue_discard_thread?
> 
> There is more cases can be called from different scenarios:
> - select_policy()
> - need_SSR()

No? They should be blocked by remount(2).

> 
> Thanks,
> 
> > 
> >>
> >> In order to fix this issue, keep gc_thread structure valid in sbi all
> >> the time instead of alloc/free it dynamically.
> >>
> >> In addition, add a rw semaphore to exclude rw operation in fields of
> >> gc_thread.
> >>
> >> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >> ---
> >> v2:
> >> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
> >>  fs/f2fs/debug.c   |  3 +--
> >>  fs/f2fs/f2fs.h    |  9 ++++++-
> >>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
> >>  fs/f2fs/gc.h      |  1 +
> >>  fs/f2fs/segment.c | 10 +++++--
> >>  fs/f2fs/super.c   | 26 +++++++++++++------
> >>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
> >>  7 files changed, 107 insertions(+), 46 deletions(-)
> >>
> >> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >> index a66107b5cfff..0fbd674c66fb 100644
> >> --- a/fs/f2fs/debug.c
> >> +++ b/fs/f2fs/debug.c
> >> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
> >>  	si->cache_mem = 0;
> >>  
> >>  	/* build gc */
> >> -	if (sbi->gc_thread)
> >> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>  
> >>  	/* build merge flush thread */
> >>  	if (SM_I(sbi)->fcc_info)
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 8f3ad9662d13..75d3b4875429 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
> >>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
> >>  }
> >>  
> >> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
> >> +{
> >> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
> >> +}
> >> +
> >>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
> >>  {
> >>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
> >> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> >>  /*
> >>   * gc.c
> >>   */
> >> +int init_gc_context(struct f2fs_sb_info *sbi);
> >> +void destroy_gc_context(struct f2fs_sb_info * sbi);
> >>  int start_gc_thread(struct f2fs_sb_info *sbi);
> >> -void stop_gc_thread(struct f2fs_sb_info *sbi);
> >> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
> >>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> >>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
> >>  			unsigned int segno);
> >> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >> index 70418b34c5f6..2febb84b2fd6 100644
> >> --- a/fs/f2fs/gc.c
> >> +++ b/fs/f2fs/gc.c
> >> @@ -26,8 +26,8 @@
> >>  static int gc_thread_func(void *data)
> >>  {
> >>  	struct f2fs_sb_info *sbi = data;
> >> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
> >> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
> >>  	unsigned int wait_ms;
> >>  
> >>  	wait_ms = gc_th->min_sleep_time;
> >> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
> >>  	return 0;
> >>  }
> >>  
> >> -int start_gc_thread(struct f2fs_sb_info *sbi)
> >> +int init_gc_context(struct f2fs_sb_info *sbi)
> >>  {
> >>  	struct f2fs_gc_kthread *gc_th;
> >> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >> -	int err = 0;
> >>  
> >>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> >> -	if (!gc_th) {
> >> -		err = -ENOMEM;
> >> -		goto out;
> >> -	}
> >> +	if (!gc_th)
> >> +		return -ENOMEM;
> >> +
> >> +	gc_th->f2fs_gc_task = NULL;
> >>  
> >>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
> >>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
> >> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
> >>  	gc_th->gc_urgent = 0;
> >>  	gc_th->gc_wake= 0;
> >>  
> >> +	init_rwsem(&gc_th->gc_rwsem);
> >> +
> >>  	sbi->gc_thread = gc_th;
> >> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> >> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +void destroy_gc_context(struct f2fs_sb_info *sbi)
> >> +{
> >> +	kfree(GC_I(sbi));
> >> +	sbi->gc_thread = NULL;
> >> +}
> >> +
> >> +int start_gc_thread(struct f2fs_sb_info *sbi)
> >> +{
> >> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >> +	int err = 0;
> >> +
> >> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
> >> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> >>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
> >>  		err = PTR_ERR(gc_th->f2fs_gc_task);
> >> -		kfree(gc_th);
> >> -		sbi->gc_thread = NULL;
> >> +		gc_th->f2fs_gc_task = NULL;
> >>  	}
> >> -out:
> >> +
> >>  	return err;
> >>  }
> >>  
> >> -void stop_gc_thread(struct f2fs_sb_info *sbi)
> >> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
> >>  {
> >> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >> -	if (!gc_th)
> >> -		return;
> >> -	kthread_stop(gc_th->f2fs_gc_task);
> >> -	kfree(gc_th);
> >> -	sbi->gc_thread = NULL;
> >> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >> +	bool stopped = false;
> >> +
> >> +	down_write(&gc_th->gc_rwsem);
> >> +	if (gc_th->f2fs_gc_task) {
> >> +		kthread_stop(gc_th->f2fs_gc_task);
> >> +		gc_th->f2fs_gc_task = NULL;
> >> +		stopped = true;
> >> +	}
> >> +	up_write(&gc_th->gc_rwsem);
> >> +
> >> +	return stopped;
> >>  }
> >>  
> >>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>  {
> >>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
> >>  
> >> -	if (!gc_th)
> >> -		return gc_mode;
> >> +	down_read(&gc_th->gc_rwsem);
> >> +	if (!gc_th->f2fs_gc_task)
> >> +		goto out;
> >>  
> >>  	if (gc_th->gc_idle) {
> >>  		if (gc_th->gc_idle == 1)
> >> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>  	}
> >>  	if (gc_th->gc_urgent)
> >>  		gc_mode = GC_GREEDY;
> >> +out:
> >> +	up_read(&gc_th->gc_rwsem);
> >>  	return gc_mode;
> >>  }
> >>  
> >> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
> >>  		p->max_search = dirty_i->nr_dirty[type];
> >>  		p->ofs_unit = 1;
> >>  	} else {
> >> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
> >> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
> >>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
> >>  		p->max_search = dirty_i->nr_dirty[DIRTY];
> >>  		p->ofs_unit = sbi->segs_per_sec;
> >>  	}
> >>  
> >>  	/* we need to check every dirty segments in the FG_GC case */
> >> -	if (gc_type != FG_GC &&
> >> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
> >> +	down_read(&GC_I(sbi)->gc_rwsem);
> >> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
> >> +			!GC_I(sbi)->gc_urgent &&
> >>  			p->max_search > sbi->max_victim_search)
> >>  		p->max_search = sbi->max_victim_search;
> >> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>  
> >>  	/* let's select beginning hot/small space first in no_heap mode*/
> >>  	if (test_opt(sbi, NOHEAP) &&
> >> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> >> index b0045d4c8d1e..9a5b273328c2 100644
> >> --- a/fs/f2fs/gc.h
> >> +++ b/fs/f2fs/gc.h
> >> @@ -26,6 +26,7 @@
> >>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
> >>  
> >>  struct f2fs_gc_kthread {
> >> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
> >>  	struct task_struct *f2fs_gc_task;
> >>  	wait_queue_head_t gc_wait_queue_head;
> >>  
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >> index 5960768d26df..f787eea1b2f6 100644
> >> --- a/fs/f2fs/segment.c
> >> +++ b/fs/f2fs/segment.c
> >> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
> >>  
> >>  	if (test_opt(sbi, LFS))
> >>  		return false;
> >> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >> +	down_read(&GC_I(sbi)->gc_rwsem);
> >> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
> >> +		down_read(&GC_I(sbi)->gc_rwsem);
> >>  		return true;
> >> +	}
> >> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>  
> >>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
> >>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> >> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
> >>  		if (dcc->discard_wake)
> >>  			dcc->discard_wake = 0;
> >>  
> >> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >> +		down_read(&GC_I(sbi)->gc_rwsem);
> >> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
> >>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
> >> +		up_read(&GC_I(sbi)->gc_rwsem);
> >>  
> >>  		sb_start_intwrite(sbi->sb);
> >>  
> >> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >> index dedb8e50b440..199f29dce86d 100644
> >> --- a/fs/f2fs/super.c
> >> +++ b/fs/f2fs/super.c
> >> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
> >>  	kfree(sbi->raw_super);
> >>  
> >>  	destroy_device_list(sbi);
> >> +	destroy_gc_context(sbi);
> >>  	mempool_destroy(sbi->write_io_dummy);
> >>  #ifdef CONFIG_QUOTA
> >>  	for (i = 0; i < MAXQUOTAS; i++)
> >> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >>  	 * option. Also sync the filesystem.
> >>  	 */
> >>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
> >> -		if (sbi->gc_thread) {
> >> -			stop_gc_thread(sbi);
> >> -			need_restart_gc = true;
> >> +		need_restart_gc = stop_gc_thread(sbi);
> >> +	} else {
> >> +		down_write(&GC_I(sbi)->gc_rwsem);
> >> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >> +			err = start_gc_thread(sbi);
> >> +			if (err) {
> >> +				up_write(&GC_I(sbi)->gc_rwsem);
> >> +				goto restore_opts;
> >> +			}
> >> +			need_stop_gc = true;
> >>  		}
> >> -	} else if (!sbi->gc_thread) {
> >> -		err = start_gc_thread(sbi);
> >> -		if (err)
> >> -			goto restore_opts;
> >> -		need_stop_gc = true;
> >> +		up_write(&GC_I(sbi)->gc_rwsem);
> >>  	}
> >>  
> >>  	if (*flags & SB_RDONLY ||
> >> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>  		goto free_meta_inode;
> >>  	}
> >>  
> >> +	err = init_gc_context(sbi);
> >> +	if (err)
> >> +		goto free_checkpoint;
> >> +
> >>  	/* Initialize device list */
> >>  	err = f2fs_scan_devices(sbi);
> >>  	if (err) {
> >> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>  	destroy_segment_manager(sbi);
> >>  free_devices:
> >>  	destroy_device_list(sbi);
> >> +	destroy_gc_context(sbi);
> >> +free_checkpoint:
> >>  	kfree(sbi->ckpt);
> >>  free_meta_inode:
> >>  	make_bad_inode(sbi->meta_inode);
> >> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >> index 2c53de9251be..b8d09935e43f 100644
> >> --- a/fs/f2fs/sysfs.c
> >> +++ b/fs/f2fs/sysfs.c
> >> @@ -46,7 +46,7 @@ struct f2fs_attr {
> >>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
> >>  {
> >>  	if (struct_type == GC_THREAD)
> >> -		return (unsigned char *)sbi->gc_thread;
> >> +		return (unsigned char *)GC_I(sbi);
> >>  	else if (struct_type == SM_INFO)
> >>  		return (unsigned char *)SM_I(sbi);
> >>  	else if (struct_type == DCC_INFO)
> >> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
> >>  	if (!strcmp(a->attr.name, "trim_sections"))
> >>  		return -EINVAL;
> >>  
> >> -	*ui = t;
> >> -
> >> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
> >> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
> >>  		f2fs_reset_iostat(sbi);
> >> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
> >> -		sbi->gc_thread->gc_wake = 1;
> >> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
> >> +
> >> +	if (a->struct_type == GC_THREAD) {
> >> +		down_write(&GC_I(sbi)->gc_rwsem);
> >> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >> +			up_write(&GC_I(sbi)->gc_rwsem);
> >> +			return -EINVAL;
> >> +		}
> >> +	}
> >> +
> >> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
> >> +		GC_I(sbi)->gc_wake = 1;
> >> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
> >>  		wake_up_discard_thread(sbi, true);
> >>  	}
> >>  
> >> +	*ui = t;
> >> +
> >> +	if (a->struct_type == GC_THREAD)
> >> +		up_write(&GC_I(sbi)->gc_rwsem);
> >> +
> >>  	return count;
> >>  }
> >>  
> >> -- 
> >> 2.15.0.55.gc2ece9dc4de6
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-27  2:07       ` Chao Yu
  (?)
@ 2018-04-28  2:37       ` Jaegeuk Kim
  -1 siblings, 0 replies; 16+ messages in thread
From: Jaegeuk Kim @ 2018-04-28  2:37 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 04/27, Chao Yu wrote:
> On 2018/4/27 10:04, Chao Yu wrote:
> > On 2018/4/27 0:03, Jaegeuk Kim wrote:
> >> On 04/24, Chao Yu wrote:
> >>> Thread A			Thread B		Thread C
> >>> - f2fs_remount
> >>>  - stop_gc_thread
> >>> 				- f2fs_sbi_store
> >>> 							- issue_discard_thread
> >>>    sbi->gc_thread = NULL;
> >>> 				  sbi->gc_thread->gc_wake = 1
> >>> 							  access sbi->gc_thread->gc_urgent
> >>>
> >>> Previously, we allocate memory for sbi->gc_thread based on background
> >>> gc thread mount option, the memory can be released if we turn off
> >>> that mount option, but still there are several places access gc_thread
> >>> pointer without considering race condition, result in NULL point
> >>> dereference.
> >>
> >> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> > 
> > Better,
> > 
> >> issue_discard_thread?
> > 
> > There is more cases can be called from different scenarios:
> > - select_policy()
> > - need_SSR()
> 
> BTW, do you agree with introducing {init,destroy}_gc_context as the patch does?

I don't think so. The issue only requires mutex() in sysfs/discard_thread.

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >>>
> >>> In order to fix this issue, keep gc_thread structure valid in sbi all
> >>> the time instead of alloc/free it dynamically.
> >>>
> >>> In addition, add a rw semaphore to exclude rw operation in fields of
> >>> gc_thread.
> >>>
> >>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>> ---
> >>> v2:
> >>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
> >>>  fs/f2fs/debug.c   |  3 +--
> >>>  fs/f2fs/f2fs.h    |  9 ++++++-
> >>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
> >>>  fs/f2fs/gc.h      |  1 +
> >>>  fs/f2fs/segment.c | 10 +++++--
> >>>  fs/f2fs/super.c   | 26 +++++++++++++------
> >>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
> >>>  7 files changed, 107 insertions(+), 46 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >>> index a66107b5cfff..0fbd674c66fb 100644
> >>> --- a/fs/f2fs/debug.c
> >>> +++ b/fs/f2fs/debug.c
> >>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
> >>>  	si->cache_mem = 0;
> >>>  
> >>>  	/* build gc */
> >>> -	if (sbi->gc_thread)
> >>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>>  
> >>>  	/* build merge flush thread */
> >>>  	if (SM_I(sbi)->fcc_info)
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 8f3ad9662d13..75d3b4875429 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
> >>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
> >>>  }
> >>>  
> >>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
> >>> +{
> >>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
> >>> +}
> >>> +
> >>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
> >>>  {
> >>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
> >>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> >>>  /*
> >>>   * gc.c
> >>>   */
> >>> +int init_gc_context(struct f2fs_sb_info *sbi);
> >>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
> >>>  int start_gc_thread(struct f2fs_sb_info *sbi);
> >>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
> >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
> >>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> >>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
> >>>  			unsigned int segno);
> >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>> index 70418b34c5f6..2febb84b2fd6 100644
> >>> --- a/fs/f2fs/gc.c
> >>> +++ b/fs/f2fs/gc.c
> >>> @@ -26,8 +26,8 @@
> >>>  static int gc_thread_func(void *data)
> >>>  {
> >>>  	struct f2fs_sb_info *sbi = data;
> >>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
> >>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
> >>>  	unsigned int wait_ms;
> >>>  
> >>>  	wait_ms = gc_th->min_sleep_time;
> >>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
> >>>  	return 0;
> >>>  }
> >>>  
> >>> -int start_gc_thread(struct f2fs_sb_info *sbi)
> >>> +int init_gc_context(struct f2fs_sb_info *sbi)
> >>>  {
> >>>  	struct f2fs_gc_kthread *gc_th;
> >>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>> -	int err = 0;
> >>>  
> >>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> >>> -	if (!gc_th) {
> >>> -		err = -ENOMEM;
> >>> -		goto out;
> >>> -	}
> >>> +	if (!gc_th)
> >>> +		return -ENOMEM;
> >>> +
> >>> +	gc_th->f2fs_gc_task = NULL;
> >>>  
> >>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
> >>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
> >>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
> >>>  	gc_th->gc_urgent = 0;
> >>>  	gc_th->gc_wake= 0;
> >>>  
> >>> +	init_rwsem(&gc_th->gc_rwsem);
> >>> +
> >>>  	sbi->gc_thread = gc_th;
> >>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> >>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
> >>> +{
> >>> +	kfree(GC_I(sbi));
> >>> +	sbi->gc_thread = NULL;
> >>> +}
> >>> +
> >>> +int start_gc_thread(struct f2fs_sb_info *sbi)
> >>> +{
> >>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>> +	int err = 0;
> >>> +
> >>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
> >>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> >>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
> >>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
> >>> -		kfree(gc_th);
> >>> -		sbi->gc_thread = NULL;
> >>> +		gc_th->f2fs_gc_task = NULL;
> >>>  	}
> >>> -out:
> >>> +
> >>>  	return err;
> >>>  }
> >>>  
> >>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
> >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
> >>>  {
> >>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>> -	if (!gc_th)
> >>> -		return;
> >>> -	kthread_stop(gc_th->f2fs_gc_task);
> >>> -	kfree(gc_th);
> >>> -	sbi->gc_thread = NULL;
> >>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> +	bool stopped = false;
> >>> +
> >>> +	down_write(&gc_th->gc_rwsem);
> >>> +	if (gc_th->f2fs_gc_task) {
> >>> +		kthread_stop(gc_th->f2fs_gc_task);
> >>> +		gc_th->f2fs_gc_task = NULL;
> >>> +		stopped = true;
> >>> +	}
> >>> +	up_write(&gc_th->gc_rwsem);
> >>> +
> >>> +	return stopped;
> >>>  }
> >>>  
> >>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>>  {
> >>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
> >>>  
> >>> -	if (!gc_th)
> >>> -		return gc_mode;
> >>> +	down_read(&gc_th->gc_rwsem);
> >>> +	if (!gc_th->f2fs_gc_task)
> >>> +		goto out;
> >>>  
> >>>  	if (gc_th->gc_idle) {
> >>>  		if (gc_th->gc_idle == 1)
> >>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>>  	}
> >>>  	if (gc_th->gc_urgent)
> >>>  		gc_mode = GC_GREEDY;
> >>> +out:
> >>> +	up_read(&gc_th->gc_rwsem);
> >>>  	return gc_mode;
> >>>  }
> >>>  
> >>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
> >>>  		p->max_search = dirty_i->nr_dirty[type];
> >>>  		p->ofs_unit = 1;
> >>>  	} else {
> >>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
> >>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
> >>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
> >>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
> >>>  		p->ofs_unit = sbi->segs_per_sec;
> >>>  	}
> >>>  
> >>>  	/* we need to check every dirty segments in the FG_GC case */
> >>> -	if (gc_type != FG_GC &&
> >>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
> >>> +	down_read(&GC_I(sbi)->gc_rwsem);
> >>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
> >>> +			!GC_I(sbi)->gc_urgent &&
> >>>  			p->max_search > sbi->max_victim_search)
> >>>  		p->max_search = sbi->max_victim_search;
> >>> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>>  
> >>>  	/* let's select beginning hot/small space first in no_heap mode*/
> >>>  	if (test_opt(sbi, NOHEAP) &&
> >>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> >>> index b0045d4c8d1e..9a5b273328c2 100644
> >>> --- a/fs/f2fs/gc.h
> >>> +++ b/fs/f2fs/gc.h
> >>> @@ -26,6 +26,7 @@
> >>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
> >>>  
> >>>  struct f2fs_gc_kthread {
> >>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
> >>>  	struct task_struct *f2fs_gc_task;
> >>>  	wait_queue_head_t gc_wait_queue_head;
> >>>  
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 5960768d26df..f787eea1b2f6 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
> >>>  
> >>>  	if (test_opt(sbi, LFS))
> >>>  		return false;
> >>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>> +	down_read(&GC_I(sbi)->gc_rwsem);
> >>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
> >>> +		down_read(&GC_I(sbi)->gc_rwsem);
> >>>  		return true;
> >>> +	}
> >>> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>>  
> >>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
> >>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> >>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
> >>>  		if (dcc->discard_wake)
> >>>  			dcc->discard_wake = 0;
> >>>  
> >>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>> +		down_read(&GC_I(sbi)->gc_rwsem);
> >>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
> >>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
> >>> +		up_read(&GC_I(sbi)->gc_rwsem);
> >>>  
> >>>  		sb_start_intwrite(sbi->sb);
> >>>  
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index dedb8e50b440..199f29dce86d 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
> >>>  	kfree(sbi->raw_super);
> >>>  
> >>>  	destroy_device_list(sbi);
> >>> +	destroy_gc_context(sbi);
> >>>  	mempool_destroy(sbi->write_io_dummy);
> >>>  #ifdef CONFIG_QUOTA
> >>>  	for (i = 0; i < MAXQUOTAS; i++)
> >>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >>>  	 * option. Also sync the filesystem.
> >>>  	 */
> >>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
> >>> -		if (sbi->gc_thread) {
> >>> -			stop_gc_thread(sbi);
> >>> -			need_restart_gc = true;
> >>> +		need_restart_gc = stop_gc_thread(sbi);
> >>> +	} else {
> >>> +		down_write(&GC_I(sbi)->gc_rwsem);
> >>> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >>> +			err = start_gc_thread(sbi);
> >>> +			if (err) {
> >>> +				up_write(&GC_I(sbi)->gc_rwsem);
> >>> +				goto restore_opts;
> >>> +			}
> >>> +			need_stop_gc = true;
> >>>  		}
> >>> -	} else if (!sbi->gc_thread) {
> >>> -		err = start_gc_thread(sbi);
> >>> -		if (err)
> >>> -			goto restore_opts;
> >>> -		need_stop_gc = true;
> >>> +		up_write(&GC_I(sbi)->gc_rwsem);
> >>>  	}
> >>>  
> >>>  	if (*flags & SB_RDONLY ||
> >>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>  		goto free_meta_inode;
> >>>  	}
> >>>  
> >>> +	err = init_gc_context(sbi);
> >>> +	if (err)
> >>> +		goto free_checkpoint;
> >>> +
> >>>  	/* Initialize device list */
> >>>  	err = f2fs_scan_devices(sbi);
> >>>  	if (err) {
> >>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>  	destroy_segment_manager(sbi);
> >>>  free_devices:
> >>>  	destroy_device_list(sbi);
> >>> +	destroy_gc_context(sbi);
> >>> +free_checkpoint:
> >>>  	kfree(sbi->ckpt);
> >>>  free_meta_inode:
> >>>  	make_bad_inode(sbi->meta_inode);
> >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>> index 2c53de9251be..b8d09935e43f 100644
> >>> --- a/fs/f2fs/sysfs.c
> >>> +++ b/fs/f2fs/sysfs.c
> >>> @@ -46,7 +46,7 @@ struct f2fs_attr {
> >>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
> >>>  {
> >>>  	if (struct_type == GC_THREAD)
> >>> -		return (unsigned char *)sbi->gc_thread;
> >>> +		return (unsigned char *)GC_I(sbi);
> >>>  	else if (struct_type == SM_INFO)
> >>>  		return (unsigned char *)SM_I(sbi);
> >>>  	else if (struct_type == DCC_INFO)
> >>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
> >>>  	if (!strcmp(a->attr.name, "trim_sections"))
> >>>  		return -EINVAL;
> >>>  
> >>> -	*ui = t;
> >>> -
> >>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
> >>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
> >>>  		f2fs_reset_iostat(sbi);
> >>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
> >>> -		sbi->gc_thread->gc_wake = 1;
> >>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
> >>> +
> >>> +	if (a->struct_type == GC_THREAD) {
> >>> +		down_write(&GC_I(sbi)->gc_rwsem);
> >>> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >>> +			up_write(&GC_I(sbi)->gc_rwsem);
> >>> +			return -EINVAL;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
> >>> +		GC_I(sbi)->gc_wake = 1;
> >>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
> >>>  		wake_up_discard_thread(sbi, true);
> >>>  	}
> >>>  
> >>> +	*ui = t;
> >>> +
> >>> +	if (a->struct_type == GC_THREAD)
> >>> +		up_write(&GC_I(sbi)->gc_rwsem);
> >>> +
> >>>  	return count;
> >>>  }
> >>>  
> >>> -- 
> >>> 2.15.0.55.gc2ece9dc4de6
> >>
> >> .
> >>
> > 
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-04-28  2:36     ` Jaegeuk Kim
@ 2018-05-02  1:47         ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-02  1:47 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/28 10:36, Jaegeuk Kim wrote:
> On 04/27, Chao Yu wrote:
>> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>>> On 04/24, Chao Yu wrote:
>>>> Thread A			Thread B		Thread C
>>>> - f2fs_remount
>>>>  - stop_gc_thread
>>>> 				- f2fs_sbi_store
>>>> 							- issue_discard_thread
>>>>    sbi->gc_thread = NULL;
>>>> 				  sbi->gc_thread->gc_wake = 1
>>>> 							  access sbi->gc_thread->gc_urgent
>>>>
>>>> Previously, we allocate memory for sbi->gc_thread based on background
>>>> gc thread mount option, the memory can be released if we turn off
>>>> that mount option, but still there are several places access gc_thread
>>>> pointer without considering race condition, result in NULL point
>>>> dereference.
>>>
>>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
>>
>> Better,
>>
>>> issue_discard_thread?
>>
>> There is more cases can be called from different scenarios:
>> - select_policy()
>> - need_SSR()
> 
> No? They should be blocked by remount(2).

How about below cases?

- do_remount
 - do_remount_sb
  - remount_fs
   - f2fs_remount
    - stop_gc_thread
					- fsync
					 - f2fs_sync_file
					  - file_write_and_wait_range
					   - f2fs_write_data_pages
					    - __write_data_page
					     - should_update_inplace
					      - check_inplace_update_policy
					       - need_SSR
     : sbi->gc_thread = NULL;

or

					     - write_data_page
					      - allocate_data_block
					       - allocate_segment
					        - get_ssr_segment
					         - select_gc_type
     : sbi->gc_thread = NULL;

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>>> the time instead of alloc/free it dynamically.
>>>>
>>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>>> gc_thread.
>>>>
>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>> ---
>>>> v2:
>>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>>  fs/f2fs/debug.c   |  3 +--
>>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>>  fs/f2fs/gc.h      |  1 +
>>>>  fs/f2fs/segment.c | 10 +++++--
>>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>> index a66107b5cfff..0fbd674c66fb 100644
>>>> --- a/fs/f2fs/debug.c
>>>> +++ b/fs/f2fs/debug.c
>>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>>  	si->cache_mem = 0;
>>>>  
>>>>  	/* build gc */
>>>> -	if (sbi->gc_thread)
>>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>  
>>>>  	/* build merge flush thread */
>>>>  	if (SM_I(sbi)->fcc_info)
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 8f3ad9662d13..75d3b4875429 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>>  }
>>>>  
>>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>>> +}
>>>> +
>>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>>  /*
>>>>   * gc.c
>>>>   */
>>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>>  			unsigned int segno);
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index 70418b34c5f6..2febb84b2fd6 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -26,8 +26,8 @@
>>>>  static int gc_thread_func(void *data)
>>>>  {
>>>>  	struct f2fs_sb_info *sbi = data;
>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>>  	unsigned int wait_ms;
>>>>  
>>>>  	wait_ms = gc_th->min_sleep_time;
>>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	struct f2fs_gc_kthread *gc_th;
>>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>> -	int err = 0;
>>>>  
>>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>>> -	if (!gc_th) {
>>>> -		err = -ENOMEM;
>>>> -		goto out;
>>>> -	}
>>>> +	if (!gc_th)
>>>> +		return -ENOMEM;
>>>> +
>>>> +	gc_th->f2fs_gc_task = NULL;
>>>>  
>>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>  	gc_th->gc_urgent = 0;
>>>>  	gc_th->gc_wake= 0;
>>>>  
>>>> +	init_rwsem(&gc_th->gc_rwsem);
>>>> +
>>>>  	sbi->gc_thread = gc_th;
>>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	kfree(GC_I(sbi));
>>>> +	sbi->gc_thread = NULL;
>>>> +}
>>>> +
>>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>> +	int err = 0;
>>>> +
>>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>>> -		kfree(gc_th);
>>>> -		sbi->gc_thread = NULL;
>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>  	}
>>>> -out:
>>>> +
>>>>  	return err;
>>>>  }
>>>>  
>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>  {
>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>> -	if (!gc_th)
>>>> -		return;
>>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>>> -	kfree(gc_th);
>>>> -	sbi->gc_thread = NULL;
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	bool stopped = false;
>>>> +
>>>> +	down_write(&gc_th->gc_rwsem);
>>>> +	if (gc_th->f2fs_gc_task) {
>>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>>> +		gc_th->f2fs_gc_task = NULL;
>>>> +		stopped = true;
>>>> +	}
>>>> +	up_write(&gc_th->gc_rwsem);
>>>> +
>>>> +	return stopped;
>>>>  }
>>>>  
>>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>  {
>>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>>  
>>>> -	if (!gc_th)
>>>> -		return gc_mode;
>>>> +	down_read(&gc_th->gc_rwsem);
>>>> +	if (!gc_th->f2fs_gc_task)
>>>> +		goto out;
>>>>  
>>>>  	if (gc_th->gc_idle) {
>>>>  		if (gc_th->gc_idle == 1)
>>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>  	}
>>>>  	if (gc_th->gc_urgent)
>>>>  		gc_mode = GC_GREEDY;
>>>> +out:
>>>> +	up_read(&gc_th->gc_rwsem);
>>>>  	return gc_mode;
>>>>  }
>>>>  
>>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>>  		p->ofs_unit = 1;
>>>>  	} else {
>>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>>  	}
>>>>  
>>>>  	/* we need to check every dirty segments in the FG_GC case */
>>>> -	if (gc_type != FG_GC &&
>>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>>> +			!GC_I(sbi)->gc_urgent &&
>>>>  			p->max_search > sbi->max_victim_search)
>>>>  		p->max_search = sbi->max_victim_search;
>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>>> index b0045d4c8d1e..9a5b273328c2 100644
>>>> --- a/fs/f2fs/gc.h
>>>> +++ b/fs/f2fs/gc.h
>>>> @@ -26,6 +26,7 @@
>>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>>  
>>>>  struct f2fs_gc_kthread {
>>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>>  	struct task_struct *f2fs_gc_task;
>>>>  	wait_queue_head_t gc_wait_queue_head;
>>>>  
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 5960768d26df..f787eea1b2f6 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>>  
>>>>  	if (test_opt(sbi, LFS))
>>>>  		return false;
>>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>  		return true;
>>>> +	}
>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>>  		if (dcc->discard_wake)
>>>>  			dcc->discard_wake = 0;
>>>>  
>>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  		sb_start_intwrite(sbi->sb);
>>>>  
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index dedb8e50b440..199f29dce86d 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>>  	kfree(sbi->raw_super);
>>>>  
>>>>  	destroy_device_list(sbi);
>>>> +	destroy_gc_context(sbi);
>>>>  	mempool_destroy(sbi->write_io_dummy);
>>>>  #ifdef CONFIG_QUOTA
>>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>  	 * option. Also sync the filesystem.
>>>>  	 */
>>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>>> -		if (sbi->gc_thread) {
>>>> -			stop_gc_thread(sbi);
>>>> -			need_restart_gc = true;
>>>> +		need_restart_gc = stop_gc_thread(sbi);
>>>> +	} else {
>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>> +			err = start_gc_thread(sbi);
>>>> +			if (err) {
>>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>>> +				goto restore_opts;
>>>> +			}
>>>> +			need_stop_gc = true;
>>>>  		}
>>>> -	} else if (!sbi->gc_thread) {
>>>> -		err = start_gc_thread(sbi);
>>>> -		if (err)
>>>> -			goto restore_opts;
>>>> -		need_stop_gc = true;
>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>  	}
>>>>  
>>>>  	if (*flags & SB_RDONLY ||
>>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>  		goto free_meta_inode;
>>>>  	}
>>>>  
>>>> +	err = init_gc_context(sbi);
>>>> +	if (err)
>>>> +		goto free_checkpoint;
>>>> +
>>>>  	/* Initialize device list */
>>>>  	err = f2fs_scan_devices(sbi);
>>>>  	if (err) {
>>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>  	destroy_segment_manager(sbi);
>>>>  free_devices:
>>>>  	destroy_device_list(sbi);
>>>> +	destroy_gc_context(sbi);
>>>> +free_checkpoint:
>>>>  	kfree(sbi->ckpt);
>>>>  free_meta_inode:
>>>>  	make_bad_inode(sbi->meta_inode);
>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>> index 2c53de9251be..b8d09935e43f 100644
>>>> --- a/fs/f2fs/sysfs.c
>>>> +++ b/fs/f2fs/sysfs.c
>>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>>  {
>>>>  	if (struct_type == GC_THREAD)
>>>> -		return (unsigned char *)sbi->gc_thread;
>>>> +		return (unsigned char *)GC_I(sbi);
>>>>  	else if (struct_type == SM_INFO)
>>>>  		return (unsigned char *)SM_I(sbi);
>>>>  	else if (struct_type == DCC_INFO)
>>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>>  		return -EINVAL;
>>>>  
>>>> -	*ui = t;
>>>> -
>>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>>  		f2fs_reset_iostat(sbi);
>>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>>> -		sbi->gc_thread->gc_wake = 1;
>>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>>> +
>>>> +	if (a->struct_type == GC_THREAD) {
>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>>> +			return -EINVAL;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>>> +		GC_I(sbi)->gc_wake = 1;
>>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>>  		wake_up_discard_thread(sbi, true);
>>>>  	}
>>>>  
>>>> +	*ui = t;
>>>> +
>>>> +	if (a->struct_type == GC_THREAD)
>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>> +
>>>>  	return count;
>>>>  }
>>>>  
>>>> -- 
>>>> 2.15.0.55.gc2ece9dc4de6
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-05-02  1:47         ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-02  1:47 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/4/28 10:36, Jaegeuk Kim wrote:
> On 04/27, Chao Yu wrote:
>> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>>> On 04/24, Chao Yu wrote:
>>>> Thread A			Thread B		Thread C
>>>> - f2fs_remount
>>>>  - stop_gc_thread
>>>> 				- f2fs_sbi_store
>>>> 							- issue_discard_thread
>>>>    sbi->gc_thread = NULL;
>>>> 				  sbi->gc_thread->gc_wake = 1
>>>> 							  access sbi->gc_thread->gc_urgent
>>>>
>>>> Previously, we allocate memory for sbi->gc_thread based on background
>>>> gc thread mount option, the memory can be released if we turn off
>>>> that mount option, but still there are several places access gc_thread
>>>> pointer without considering race condition, result in NULL point
>>>> dereference.
>>>
>>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
>>
>> Better,
>>
>>> issue_discard_thread?
>>
>> There is more cases can be called from different scenarios:
>> - select_policy()
>> - need_SSR()
> 
> No? They should be blocked by remount(2).

How about below cases?

- do_remount
 - do_remount_sb
  - remount_fs
   - f2fs_remount
    - stop_gc_thread
					- fsync
					 - f2fs_sync_file
					  - file_write_and_wait_range
					   - f2fs_write_data_pages
					    - __write_data_page
					     - should_update_inplace
					      - check_inplace_update_policy
					       - need_SSR
     : sbi->gc_thread = NULL;

or

					     - write_data_page
					      - allocate_data_block
					       - allocate_segment
					        - get_ssr_segment
					         - select_gc_type
     : sbi->gc_thread = NULL;

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>>>
>>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>>> the time instead of alloc/free it dynamically.
>>>>
>>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>>> gc_thread.
>>>>
>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>> ---
>>>> v2:
>>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>>  fs/f2fs/debug.c   |  3 +--
>>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>>  fs/f2fs/gc.h      |  1 +
>>>>  fs/f2fs/segment.c | 10 +++++--
>>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>> index a66107b5cfff..0fbd674c66fb 100644
>>>> --- a/fs/f2fs/debug.c
>>>> +++ b/fs/f2fs/debug.c
>>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>>  	si->cache_mem = 0;
>>>>  
>>>>  	/* build gc */
>>>> -	if (sbi->gc_thread)
>>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>  
>>>>  	/* build merge flush thread */
>>>>  	if (SM_I(sbi)->fcc_info)
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 8f3ad9662d13..75d3b4875429 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>>  }
>>>>  
>>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>>> +}
>>>> +
>>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>>  /*
>>>>   * gc.c
>>>>   */
>>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>>  			unsigned int segno);
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index 70418b34c5f6..2febb84b2fd6 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -26,8 +26,8 @@
>>>>  static int gc_thread_func(void *data)
>>>>  {
>>>>  	struct f2fs_sb_info *sbi = data;
>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>>  	unsigned int wait_ms;
>>>>  
>>>>  	wait_ms = gc_th->min_sleep_time;
>>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>>  {
>>>>  	struct f2fs_gc_kthread *gc_th;
>>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>> -	int err = 0;
>>>>  
>>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>>> -	if (!gc_th) {
>>>> -		err = -ENOMEM;
>>>> -		goto out;
>>>> -	}
>>>> +	if (!gc_th)
>>>> +		return -ENOMEM;
>>>> +
>>>> +	gc_th->f2fs_gc_task = NULL;
>>>>  
>>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>  	gc_th->gc_urgent = 0;
>>>>  	gc_th->gc_wake= 0;
>>>>  
>>>> +	init_rwsem(&gc_th->gc_rwsem);
>>>> +
>>>>  	sbi->gc_thread = gc_th;
>>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	kfree(GC_I(sbi));
>>>> +	sbi->gc_thread = NULL;
>>>> +}
>>>> +
>>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>>> +{
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>> +	int err = 0;
>>>> +
>>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>>> -		kfree(gc_th);
>>>> -		sbi->gc_thread = NULL;
>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>  	}
>>>> -out:
>>>> +
>>>>  	return err;
>>>>  }
>>>>  
>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>  {
>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>> -	if (!gc_th)
>>>> -		return;
>>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>>> -	kfree(gc_th);
>>>> -	sbi->gc_thread = NULL;
>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>> +	bool stopped = false;
>>>> +
>>>> +	down_write(&gc_th->gc_rwsem);
>>>> +	if (gc_th->f2fs_gc_task) {
>>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>>> +		gc_th->f2fs_gc_task = NULL;
>>>> +		stopped = true;
>>>> +	}
>>>> +	up_write(&gc_th->gc_rwsem);
>>>> +
>>>> +	return stopped;
>>>>  }
>>>>  
>>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>  {
>>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>>  
>>>> -	if (!gc_th)
>>>> -		return gc_mode;
>>>> +	down_read(&gc_th->gc_rwsem);
>>>> +	if (!gc_th->f2fs_gc_task)
>>>> +		goto out;
>>>>  
>>>>  	if (gc_th->gc_idle) {
>>>>  		if (gc_th->gc_idle == 1)
>>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>  	}
>>>>  	if (gc_th->gc_urgent)
>>>>  		gc_mode = GC_GREEDY;
>>>> +out:
>>>> +	up_read(&gc_th->gc_rwsem);
>>>>  	return gc_mode;
>>>>  }
>>>>  
>>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>>  		p->ofs_unit = 1;
>>>>  	} else {
>>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>>  	}
>>>>  
>>>>  	/* we need to check every dirty segments in the FG_GC case */
>>>> -	if (gc_type != FG_GC &&
>>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>>> +			!GC_I(sbi)->gc_urgent &&
>>>>  			p->max_search > sbi->max_victim_search)
>>>>  		p->max_search = sbi->max_victim_search;
>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>>> index b0045d4c8d1e..9a5b273328c2 100644
>>>> --- a/fs/f2fs/gc.h
>>>> +++ b/fs/f2fs/gc.h
>>>> @@ -26,6 +26,7 @@
>>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>>  
>>>>  struct f2fs_gc_kthread {
>>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>>  	struct task_struct *f2fs_gc_task;
>>>>  	wait_queue_head_t gc_wait_queue_head;
>>>>  
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 5960768d26df..f787eea1b2f6 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>>  
>>>>  	if (test_opt(sbi, LFS))
>>>>  		return false;
>>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>  		return true;
>>>> +	}
>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>>  		if (dcc->discard_wake)
>>>>  			dcc->discard_wake = 0;
>>>>  
>>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>>  
>>>>  		sb_start_intwrite(sbi->sb);
>>>>  
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index dedb8e50b440..199f29dce86d 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>>  	kfree(sbi->raw_super);
>>>>  
>>>>  	destroy_device_list(sbi);
>>>> +	destroy_gc_context(sbi);
>>>>  	mempool_destroy(sbi->write_io_dummy);
>>>>  #ifdef CONFIG_QUOTA
>>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>  	 * option. Also sync the filesystem.
>>>>  	 */
>>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>>> -		if (sbi->gc_thread) {
>>>> -			stop_gc_thread(sbi);
>>>> -			need_restart_gc = true;
>>>> +		need_restart_gc = stop_gc_thread(sbi);
>>>> +	} else {
>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>> +			err = start_gc_thread(sbi);
>>>> +			if (err) {
>>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>>> +				goto restore_opts;
>>>> +			}
>>>> +			need_stop_gc = true;
>>>>  		}
>>>> -	} else if (!sbi->gc_thread) {
>>>> -		err = start_gc_thread(sbi);
>>>> -		if (err)
>>>> -			goto restore_opts;
>>>> -		need_stop_gc = true;
>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>  	}
>>>>  
>>>>  	if (*flags & SB_RDONLY ||
>>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>  		goto free_meta_inode;
>>>>  	}
>>>>  
>>>> +	err = init_gc_context(sbi);
>>>> +	if (err)
>>>> +		goto free_checkpoint;
>>>> +
>>>>  	/* Initialize device list */
>>>>  	err = f2fs_scan_devices(sbi);
>>>>  	if (err) {
>>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>  	destroy_segment_manager(sbi);
>>>>  free_devices:
>>>>  	destroy_device_list(sbi);
>>>> +	destroy_gc_context(sbi);
>>>> +free_checkpoint:
>>>>  	kfree(sbi->ckpt);
>>>>  free_meta_inode:
>>>>  	make_bad_inode(sbi->meta_inode);
>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>> index 2c53de9251be..b8d09935e43f 100644
>>>> --- a/fs/f2fs/sysfs.c
>>>> +++ b/fs/f2fs/sysfs.c
>>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>>  {
>>>>  	if (struct_type == GC_THREAD)
>>>> -		return (unsigned char *)sbi->gc_thread;
>>>> +		return (unsigned char *)GC_I(sbi);
>>>>  	else if (struct_type == SM_INFO)
>>>>  		return (unsigned char *)SM_I(sbi);
>>>>  	else if (struct_type == DCC_INFO)
>>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>>  		return -EINVAL;
>>>>  
>>>> -	*ui = t;
>>>> -
>>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>>  		f2fs_reset_iostat(sbi);
>>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>>> -		sbi->gc_thread->gc_wake = 1;
>>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>>> +
>>>> +	if (a->struct_type == GC_THREAD) {
>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>>> +			return -EINVAL;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>>> +		GC_I(sbi)->gc_wake = 1;
>>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>>  		wake_up_discard_thread(sbi, true);
>>>>  	}
>>>>  
>>>> +	*ui = t;
>>>> +
>>>> +	if (a->struct_type == GC_THREAD)
>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>> +
>>>>  	return count;
>>>>  }
>>>>  
>>>> -- 
>>>> 2.15.0.55.gc2ece9dc4de6
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-05-02  1:47         ` Chao Yu
  (?)
@ 2018-05-04 18:59         ` Jaegeuk Kim
  2018-05-05  2:21             ` Chao Yu
  -1 siblings, 1 reply; 16+ messages in thread
From: Jaegeuk Kim @ 2018-05-04 18:59 UTC (permalink / raw)
  To: Chao Yu; +Cc: linux-f2fs-devel, linux-kernel, chao

On 05/02, Chao Yu wrote:
> On 2018/4/28 10:36, Jaegeuk Kim wrote:
> > On 04/27, Chao Yu wrote:
> >> On 2018/4/27 0:03, Jaegeuk Kim wrote:
> >>> On 04/24, Chao Yu wrote:
> >>>> Thread A			Thread B		Thread C
> >>>> - f2fs_remount
> >>>>  - stop_gc_thread
> >>>> 				- f2fs_sbi_store
> >>>> 							- issue_discard_thread
> >>>>    sbi->gc_thread = NULL;
> >>>> 				  sbi->gc_thread->gc_wake = 1
> >>>> 							  access sbi->gc_thread->gc_urgent
> >>>>
> >>>> Previously, we allocate memory for sbi->gc_thread based on background
> >>>> gc thread mount option, the memory can be released if we turn off
> >>>> that mount option, but still there are several places access gc_thread
> >>>> pointer without considering race condition, result in NULL point
> >>>> dereference.
> >>>
> >>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> >>
> >> Better,
> >>
> >>> issue_discard_thread?
> >>
> >> There is more cases can be called from different scenarios:
> >> - select_policy()
> >> - need_SSR()
> > 
> > No? They should be blocked by remount(2).
> 
> How about below cases?
> 
> - do_remount

Was there no guard to block any filesystem operations?
If it's true, yeah, we need to cover them.

>  - do_remount_sb
>   - remount_fs
>    - f2fs_remount
>     - stop_gc_thread
> 					- fsync
> 					 - f2fs_sync_file
> 					  - file_write_and_wait_range
> 					   - f2fs_write_data_pages
> 					    - __write_data_page
> 					     - should_update_inplace
> 					      - check_inplace_update_policy
> 					       - need_SSR
>      : sbi->gc_thread = NULL;
> 
> or
> 
> 					     - write_data_page
> 					      - allocate_data_block
> 					       - allocate_segment
> 					        - get_ssr_segment
> 					         - select_gc_type
>      : sbi->gc_thread = NULL;
> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> In order to fix this issue, keep gc_thread structure valid in sbi all
> >>>> the time instead of alloc/free it dynamically.
> >>>>
> >>>> In addition, add a rw semaphore to exclude rw operation in fields of
> >>>> gc_thread.
> >>>>
> >>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>> ---
> >>>> v2:
> >>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
> >>>>  fs/f2fs/debug.c   |  3 +--
> >>>>  fs/f2fs/f2fs.h    |  9 ++++++-
> >>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
> >>>>  fs/f2fs/gc.h      |  1 +
> >>>>  fs/f2fs/segment.c | 10 +++++--
> >>>>  fs/f2fs/super.c   | 26 +++++++++++++------
> >>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
> >>>>  7 files changed, 107 insertions(+), 46 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >>>> index a66107b5cfff..0fbd674c66fb 100644
> >>>> --- a/fs/f2fs/debug.c
> >>>> +++ b/fs/f2fs/debug.c
> >>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
> >>>>  	si->cache_mem = 0;
> >>>>  
> >>>>  	/* build gc */
> >>>> -	if (sbi->gc_thread)
> >>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>>>  
> >>>>  	/* build merge flush thread */
> >>>>  	if (SM_I(sbi)->fcc_info)
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index 8f3ad9662d13..75d3b4875429 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
> >>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
> >>>>  }
> >>>>  
> >>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
> >>>> +}
> >>>> +
> >>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
> >>>>  {
> >>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
> >>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> >>>>  /*
> >>>>   * gc.c
> >>>>   */
> >>>> +int init_gc_context(struct f2fs_sb_info *sbi);
> >>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
> >>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
> >>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
> >>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
> >>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> >>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
> >>>>  			unsigned int segno);
> >>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>>> index 70418b34c5f6..2febb84b2fd6 100644
> >>>> --- a/fs/f2fs/gc.c
> >>>> +++ b/fs/f2fs/gc.c
> >>>> @@ -26,8 +26,8 @@
> >>>>  static int gc_thread_func(void *data)
> >>>>  {
> >>>>  	struct f2fs_sb_info *sbi = data;
> >>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
> >>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
> >>>>  	unsigned int wait_ms;
> >>>>  
> >>>>  	wait_ms = gc_th->min_sleep_time;
> >>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
> >>>> +int init_gc_context(struct f2fs_sb_info *sbi)
> >>>>  {
> >>>>  	struct f2fs_gc_kthread *gc_th;
> >>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>>> -	int err = 0;
> >>>>  
> >>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> >>>> -	if (!gc_th) {
> >>>> -		err = -ENOMEM;
> >>>> -		goto out;
> >>>> -	}
> >>>> +	if (!gc_th)
> >>>> +		return -ENOMEM;
> >>>> +
> >>>> +	gc_th->f2fs_gc_task = NULL;
> >>>>  
> >>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
> >>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
> >>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
> >>>>  	gc_th->gc_urgent = 0;
> >>>>  	gc_th->gc_wake= 0;
> >>>>  
> >>>> +	init_rwsem(&gc_th->gc_rwsem);
> >>>> +
> >>>>  	sbi->gc_thread = gc_th;
> >>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> >>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>>> +
> >>>> +	return 0;
> >>>> +}
> >>>> +
> >>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	kfree(GC_I(sbi));
> >>>> +	sbi->gc_thread = NULL;
> >>>> +}
> >>>> +
> >>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
> >>>> +{
> >>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>>> +	int err = 0;
> >>>> +
> >>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
> >>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> >>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
> >>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
> >>>> -		kfree(gc_th);
> >>>> -		sbi->gc_thread = NULL;
> >>>> +		gc_th->f2fs_gc_task = NULL;
> >>>>  	}
> >>>> -out:
> >>>> +
> >>>>  	return err;
> >>>>  }
> >>>>  
> >>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
> >>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
> >>>>  {
> >>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>>> -	if (!gc_th)
> >>>> -		return;
> >>>> -	kthread_stop(gc_th->f2fs_gc_task);
> >>>> -	kfree(gc_th);
> >>>> -	sbi->gc_thread = NULL;
> >>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>>> +	bool stopped = false;
> >>>> +
> >>>> +	down_write(&gc_th->gc_rwsem);
> >>>> +	if (gc_th->f2fs_gc_task) {
> >>>> +		kthread_stop(gc_th->f2fs_gc_task);
> >>>> +		gc_th->f2fs_gc_task = NULL;
> >>>> +		stopped = true;
> >>>> +	}
> >>>> +	up_write(&gc_th->gc_rwsem);
> >>>> +
> >>>> +	return stopped;
> >>>>  }
> >>>>  
> >>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>>>  {
> >>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
> >>>>  
> >>>> -	if (!gc_th)
> >>>> -		return gc_mode;
> >>>> +	down_read(&gc_th->gc_rwsem);
> >>>> +	if (!gc_th->f2fs_gc_task)
> >>>> +		goto out;
> >>>>  
> >>>>  	if (gc_th->gc_idle) {
> >>>>  		if (gc_th->gc_idle == 1)
> >>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>>>  	}
> >>>>  	if (gc_th->gc_urgent)
> >>>>  		gc_mode = GC_GREEDY;
> >>>> +out:
> >>>> +	up_read(&gc_th->gc_rwsem);
> >>>>  	return gc_mode;
> >>>>  }
> >>>>  
> >>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
> >>>>  		p->max_search = dirty_i->nr_dirty[type];
> >>>>  		p->ofs_unit = 1;
> >>>>  	} else {
> >>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
> >>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
> >>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
> >>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
> >>>>  		p->ofs_unit = sbi->segs_per_sec;
> >>>>  	}
> >>>>  
> >>>>  	/* we need to check every dirty segments in the FG_GC case */
> >>>> -	if (gc_type != FG_GC &&
> >>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
> >>>> +	down_read(&GC_I(sbi)->gc_rwsem);
> >>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
> >>>> +			!GC_I(sbi)->gc_urgent &&
> >>>>  			p->max_search > sbi->max_victim_search)
> >>>>  		p->max_search = sbi->max_victim_search;
> >>>> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>>>  
> >>>>  	/* let's select beginning hot/small space first in no_heap mode*/
> >>>>  	if (test_opt(sbi, NOHEAP) &&
> >>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> >>>> index b0045d4c8d1e..9a5b273328c2 100644
> >>>> --- a/fs/f2fs/gc.h
> >>>> +++ b/fs/f2fs/gc.h
> >>>> @@ -26,6 +26,7 @@
> >>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
> >>>>  
> >>>>  struct f2fs_gc_kthread {
> >>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
> >>>>  	struct task_struct *f2fs_gc_task;
> >>>>  	wait_queue_head_t gc_wait_queue_head;
> >>>>  
> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>> index 5960768d26df..f787eea1b2f6 100644
> >>>> --- a/fs/f2fs/segment.c
> >>>> +++ b/fs/f2fs/segment.c
> >>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
> >>>>  
> >>>>  	if (test_opt(sbi, LFS))
> >>>>  		return false;
> >>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>>> +	down_read(&GC_I(sbi)->gc_rwsem);
> >>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
> >>>> +		down_read(&GC_I(sbi)->gc_rwsem);
> >>>>  		return true;
> >>>> +	}
> >>>> +	up_read(&GC_I(sbi)->gc_rwsem);
> >>>>  
> >>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
> >>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> >>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
> >>>>  		if (dcc->discard_wake)
> >>>>  			dcc->discard_wake = 0;
> >>>>  
> >>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>>> +		down_read(&GC_I(sbi)->gc_rwsem);
> >>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
> >>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
> >>>> +		up_read(&GC_I(sbi)->gc_rwsem);
> >>>>  
> >>>>  		sb_start_intwrite(sbi->sb);
> >>>>  
> >>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>>> index dedb8e50b440..199f29dce86d 100644
> >>>> --- a/fs/f2fs/super.c
> >>>> +++ b/fs/f2fs/super.c
> >>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
> >>>>  	kfree(sbi->raw_super);
> >>>>  
> >>>>  	destroy_device_list(sbi);
> >>>> +	destroy_gc_context(sbi);
> >>>>  	mempool_destroy(sbi->write_io_dummy);
> >>>>  #ifdef CONFIG_QUOTA
> >>>>  	for (i = 0; i < MAXQUOTAS; i++)
> >>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >>>>  	 * option. Also sync the filesystem.
> >>>>  	 */
> >>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
> >>>> -		if (sbi->gc_thread) {
> >>>> -			stop_gc_thread(sbi);
> >>>> -			need_restart_gc = true;
> >>>> +		need_restart_gc = stop_gc_thread(sbi);
> >>>> +	} else {
> >>>> +		down_write(&GC_I(sbi)->gc_rwsem);
> >>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >>>> +			err = start_gc_thread(sbi);
> >>>> +			if (err) {
> >>>> +				up_write(&GC_I(sbi)->gc_rwsem);
> >>>> +				goto restore_opts;
> >>>> +			}
> >>>> +			need_stop_gc = true;
> >>>>  		}
> >>>> -	} else if (!sbi->gc_thread) {
> >>>> -		err = start_gc_thread(sbi);
> >>>> -		if (err)
> >>>> -			goto restore_opts;
> >>>> -		need_stop_gc = true;
> >>>> +		up_write(&GC_I(sbi)->gc_rwsem);
> >>>>  	}
> >>>>  
> >>>>  	if (*flags & SB_RDONLY ||
> >>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>  		goto free_meta_inode;
> >>>>  	}
> >>>>  
> >>>> +	err = init_gc_context(sbi);
> >>>> +	if (err)
> >>>> +		goto free_checkpoint;
> >>>> +
> >>>>  	/* Initialize device list */
> >>>>  	err = f2fs_scan_devices(sbi);
> >>>>  	if (err) {
> >>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>>>  	destroy_segment_manager(sbi);
> >>>>  free_devices:
> >>>>  	destroy_device_list(sbi);
> >>>> +	destroy_gc_context(sbi);
> >>>> +free_checkpoint:
> >>>>  	kfree(sbi->ckpt);
> >>>>  free_meta_inode:
> >>>>  	make_bad_inode(sbi->meta_inode);
> >>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>>> index 2c53de9251be..b8d09935e43f 100644
> >>>> --- a/fs/f2fs/sysfs.c
> >>>> +++ b/fs/f2fs/sysfs.c
> >>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
> >>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
> >>>>  {
> >>>>  	if (struct_type == GC_THREAD)
> >>>> -		return (unsigned char *)sbi->gc_thread;
> >>>> +		return (unsigned char *)GC_I(sbi);
> >>>>  	else if (struct_type == SM_INFO)
> >>>>  		return (unsigned char *)SM_I(sbi);
> >>>>  	else if (struct_type == DCC_INFO)
> >>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
> >>>>  	if (!strcmp(a->attr.name, "trim_sections"))
> >>>>  		return -EINVAL;
> >>>>  
> >>>> -	*ui = t;
> >>>> -
> >>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
> >>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
> >>>>  		f2fs_reset_iostat(sbi);
> >>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
> >>>> -		sbi->gc_thread->gc_wake = 1;
> >>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
> >>>> +
> >>>> +	if (a->struct_type == GC_THREAD) {
> >>>> +		down_write(&GC_I(sbi)->gc_rwsem);
> >>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
> >>>> +			up_write(&GC_I(sbi)->gc_rwsem);
> >>>> +			return -EINVAL;
> >>>> +		}
> >>>> +	}
> >>>> +
> >>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
> >>>> +		GC_I(sbi)->gc_wake = 1;
> >>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
> >>>>  		wake_up_discard_thread(sbi, true);
> >>>>  	}
> >>>>  
> >>>> +	*ui = t;
> >>>> +
> >>>> +	if (a->struct_type == GC_THREAD)
> >>>> +		up_write(&GC_I(sbi)->gc_rwsem);
> >>>> +
> >>>>  	return count;
> >>>>  }
> >>>>  
> >>>> -- 
> >>>> 2.15.0.55.gc2ece9dc4de6
> >>>
> >>> .
> >>>
> > 
> > .
> > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
  2018-05-04 18:59         ` Jaegeuk Kim
@ 2018-05-05  2:21             ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-05  2:21 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/5/5 2:59, Jaegeuk Kim wrote:
> On 05/02, Chao Yu wrote:
>> On 2018/4/28 10:36, Jaegeuk Kim wrote:
>>> On 04/27, Chao Yu wrote:
>>>> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>>>>> On 04/24, Chao Yu wrote:
>>>>>> Thread A			Thread B		Thread C
>>>>>> - f2fs_remount
>>>>>>  - stop_gc_thread
>>>>>> 				- f2fs_sbi_store
>>>>>> 							- issue_discard_thread
>>>>>>    sbi->gc_thread = NULL;
>>>>>> 				  sbi->gc_thread->gc_wake = 1
>>>>>> 							  access sbi->gc_thread->gc_urgent
>>>>>>
>>>>>> Previously, we allocate memory for sbi->gc_thread based on background
>>>>>> gc thread mount option, the memory can be released if we turn off
>>>>>> that mount option, but still there are several places access gc_thread
>>>>>> pointer without considering race condition, result in NULL point
>>>>>> dereference.
>>>>>
>>>>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
>>>>
>>>> Better,
>>>>
>>>>> issue_discard_thread?
>>>>
>>>> There is more cases can be called from different scenarios:
>>>> - select_policy()
>>>> - need_SSR()
>>>
>>> No? They should be blocked by remount(2).
>>
>> How about below cases?
>>
>> - do_remount
> 
> Was there no guard to block any filesystem operations?

The only block point is f2fs_readonly in f2fs_sync_file, without holding
s_umount during ->fsync, so IMO, we need to cover need_SSR & select_gc_type, let
me update the patch later.

Thanks,

> If it's true, yeah, we need to cover them.
> 
>>  - do_remount_sb
>>   - remount_fs
>>    - f2fs_remount
>>     - stop_gc_thread
>> 					- fsync
>> 					 - f2fs_sync_file
>> 					  - file_write_and_wait_range
>> 					   - f2fs_write_data_pages
>> 					    - __write_data_page
>> 					     - should_update_inplace
>> 					      - check_inplace_update_policy
>> 					       - need_SSR
>>      : sbi->gc_thread = NULL;
>>
>> or
>>
>> 					     - write_data_page
>> 					      - allocate_data_block
>> 					       - allocate_segment
>> 					        - get_ssr_segment
>> 					         - select_gc_type
>>      : sbi->gc_thread = NULL;
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>>>>> the time instead of alloc/free it dynamically.
>>>>>>
>>>>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>>>>> gc_thread.
>>>>>>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>> v2:
>>>>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>>>>  fs/f2fs/debug.c   |  3 +--
>>>>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>>>>  fs/f2fs/gc.h      |  1 +
>>>>>>  fs/f2fs/segment.c | 10 +++++--
>>>>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>> index a66107b5cfff..0fbd674c66fb 100644
>>>>>> --- a/fs/f2fs/debug.c
>>>>>> +++ b/fs/f2fs/debug.c
>>>>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>>>>  	si->cache_mem = 0;
>>>>>>  
>>>>>>  	/* build gc */
>>>>>> -	if (sbi->gc_thread)
>>>>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>>>  
>>>>>>  	/* build merge flush thread */
>>>>>>  	if (SM_I(sbi)->fcc_info)
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 8f3ad9662d13..75d3b4875429 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>>>>  }
>>>>>>  
>>>>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>>>>> +}
>>>>>> +
>>>>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>>>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>>>>  /*
>>>>>>   * gc.c
>>>>>>   */
>>>>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>>>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>>>>  			unsigned int segno);
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index 70418b34c5f6..2febb84b2fd6 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -26,8 +26,8 @@
>>>>>>  static int gc_thread_func(void *data)
>>>>>>  {
>>>>>>  	struct f2fs_sb_info *sbi = data;
>>>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>>>>  	unsigned int wait_ms;
>>>>>>  
>>>>>>  	wait_ms = gc_th->min_sleep_time;
>>>>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	struct f2fs_gc_kthread *gc_th;
>>>>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>>>> -	int err = 0;
>>>>>>  
>>>>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>>>>> -	if (!gc_th) {
>>>>>> -		err = -ENOMEM;
>>>>>> -		goto out;
>>>>>> -	}
>>>>>> +	if (!gc_th)
>>>>>> +		return -ENOMEM;
>>>>>> +
>>>>>> +	gc_th->f2fs_gc_task = NULL;
>>>>>>  
>>>>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>>>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>>  	gc_th->gc_urgent = 0;
>>>>>>  	gc_th->gc_wake= 0;
>>>>>>  
>>>>>> +	init_rwsem(&gc_th->gc_rwsem);
>>>>>> +
>>>>>>  	sbi->gc_thread = gc_th;
>>>>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>>>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	kfree(GC_I(sbi));
>>>>>> +	sbi->gc_thread = NULL;
>>>>>> +}
>>>>>> +
>>>>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>>>> +	int err = 0;
>>>>>> +
>>>>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>>>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>>>>> -		kfree(gc_th);
>>>>>> -		sbi->gc_thread = NULL;
>>>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>>>  	}
>>>>>> -out:
>>>>>> +
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>>>> -	if (!gc_th)
>>>>>> -		return;
>>>>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>>>>> -	kfree(gc_th);
>>>>>> -	sbi->gc_thread = NULL;
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	bool stopped = false;
>>>>>> +
>>>>>> +	down_write(&gc_th->gc_rwsem);
>>>>>> +	if (gc_th->f2fs_gc_task) {
>>>>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>>> +		stopped = true;
>>>>>> +	}
>>>>>> +	up_write(&gc_th->gc_rwsem);
>>>>>> +
>>>>>> +	return stopped;
>>>>>>  }
>>>>>>  
>>>>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>>>  {
>>>>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>>>>  
>>>>>> -	if (!gc_th)
>>>>>> -		return gc_mode;
>>>>>> +	down_read(&gc_th->gc_rwsem);
>>>>>> +	if (!gc_th->f2fs_gc_task)
>>>>>> +		goto out;
>>>>>>  
>>>>>>  	if (gc_th->gc_idle) {
>>>>>>  		if (gc_th->gc_idle == 1)
>>>>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>>>  	}
>>>>>>  	if (gc_th->gc_urgent)
>>>>>>  		gc_mode = GC_GREEDY;
>>>>>> +out:
>>>>>> +	up_read(&gc_th->gc_rwsem);
>>>>>>  	return gc_mode;
>>>>>>  }
>>>>>>  
>>>>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>>>>  		p->ofs_unit = 1;
>>>>>>  	} else {
>>>>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>>>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>>>>  	}
>>>>>>  
>>>>>>  	/* we need to check every dirty segments in the FG_GC case */
>>>>>> -	if (gc_type != FG_GC &&
>>>>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>>>>> +			!GC_I(sbi)->gc_urgent &&
>>>>>>  			p->max_search > sbi->max_victim_search)
>>>>>>  		p->max_search = sbi->max_victim_search;
>>>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>>>>> index b0045d4c8d1e..9a5b273328c2 100644
>>>>>> --- a/fs/f2fs/gc.h
>>>>>> +++ b/fs/f2fs/gc.h
>>>>>> @@ -26,6 +26,7 @@
>>>>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>>>>  
>>>>>>  struct f2fs_gc_kthread {
>>>>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>>>>  	struct task_struct *f2fs_gc_task;
>>>>>>  	wait_queue_head_t gc_wait_queue_head;
>>>>>>  
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 5960768d26df..f787eea1b2f6 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>>>>  
>>>>>>  	if (test_opt(sbi, LFS))
>>>>>>  		return false;
>>>>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  		return true;
>>>>>> +	}
>>>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>>>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>>>>  		if (dcc->discard_wake)
>>>>>>  			dcc->discard_wake = 0;
>>>>>>  
>>>>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>>>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  		sb_start_intwrite(sbi->sb);
>>>>>>  
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index dedb8e50b440..199f29dce86d 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>>>>  	kfree(sbi->raw_super);
>>>>>>  
>>>>>>  	destroy_device_list(sbi);
>>>>>> +	destroy_gc_context(sbi);
>>>>>>  	mempool_destroy(sbi->write_io_dummy);
>>>>>>  #ifdef CONFIG_QUOTA
>>>>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>>>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>>>  	 * option. Also sync the filesystem.
>>>>>>  	 */
>>>>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>>>>> -		if (sbi->gc_thread) {
>>>>>> -			stop_gc_thread(sbi);
>>>>>> -			need_restart_gc = true;
>>>>>> +		need_restart_gc = stop_gc_thread(sbi);
>>>>>> +	} else {
>>>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>>>> +			err = start_gc_thread(sbi);
>>>>>> +			if (err) {
>>>>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +				goto restore_opts;
>>>>>> +			}
>>>>>> +			need_stop_gc = true;
>>>>>>  		}
>>>>>> -	} else if (!sbi->gc_thread) {
>>>>>> -		err = start_gc_thread(sbi);
>>>>>> -		if (err)
>>>>>> -			goto restore_opts;
>>>>>> -		need_stop_gc = true;
>>>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>>>  	}
>>>>>>  
>>>>>>  	if (*flags & SB_RDONLY ||
>>>>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  		goto free_meta_inode;
>>>>>>  	}
>>>>>>  
>>>>>> +	err = init_gc_context(sbi);
>>>>>> +	if (err)
>>>>>> +		goto free_checkpoint;
>>>>>> +
>>>>>>  	/* Initialize device list */
>>>>>>  	err = f2fs_scan_devices(sbi);
>>>>>>  	if (err) {
>>>>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  	destroy_segment_manager(sbi);
>>>>>>  free_devices:
>>>>>>  	destroy_device_list(sbi);
>>>>>> +	destroy_gc_context(sbi);
>>>>>> +free_checkpoint:
>>>>>>  	kfree(sbi->ckpt);
>>>>>>  free_meta_inode:
>>>>>>  	make_bad_inode(sbi->meta_inode);
>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>> index 2c53de9251be..b8d09935e43f 100644
>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>>>>  {
>>>>>>  	if (struct_type == GC_THREAD)
>>>>>> -		return (unsigned char *)sbi->gc_thread;
>>>>>> +		return (unsigned char *)GC_I(sbi);
>>>>>>  	else if (struct_type == SM_INFO)
>>>>>>  		return (unsigned char *)SM_I(sbi);
>>>>>>  	else if (struct_type == DCC_INFO)
>>>>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>>>>  		return -EINVAL;
>>>>>>  
>>>>>> -	*ui = t;
>>>>>> -
>>>>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>>>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>>>>  		f2fs_reset_iostat(sbi);
>>>>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>>>>> -		sbi->gc_thread->gc_wake = 1;
>>>>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>>>>> +
>>>>>> +	if (a->struct_type == GC_THREAD) {
>>>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +			return -EINVAL;
>>>>>> +		}
>>>>>> +	}
>>>>>> +
>>>>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>>>>> +		GC_I(sbi)->gc_wake = 1;
>>>>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>>>>  		wake_up_discard_thread(sbi, true);
>>>>>>  	}
>>>>>>  
>>>>>> +	*ui = t;
>>>>>> +
>>>>>> +	if (a->struct_type == GC_THREAD)
>>>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +
>>>>>>  	return count;
>>>>>>  }
>>>>>>  
>>>>>> -- 
>>>>>> 2.15.0.55.gc2ece9dc4de6
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-05-05  2:21             ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-05  2:21 UTC (permalink / raw)
  To: Jaegeuk Kim; +Cc: linux-f2fs-devel, linux-kernel, chao

On 2018/5/5 2:59, Jaegeuk Kim wrote:
> On 05/02, Chao Yu wrote:
>> On 2018/4/28 10:36, Jaegeuk Kim wrote:
>>> On 04/27, Chao Yu wrote:
>>>> On 2018/4/27 0:03, Jaegeuk Kim wrote:
>>>>> On 04/24, Chao Yu wrote:
>>>>>> Thread A			Thread B		Thread C
>>>>>> - f2fs_remount
>>>>>>  - stop_gc_thread
>>>>>> 				- f2fs_sbi_store
>>>>>> 							- issue_discard_thread
>>>>>>    sbi->gc_thread = NULL;
>>>>>> 				  sbi->gc_thread->gc_wake = 1
>>>>>> 							  access sbi->gc_thread->gc_urgent
>>>>>>
>>>>>> Previously, we allocate memory for sbi->gc_thread based on background
>>>>>> gc thread mount option, the memory can be released if we turn off
>>>>>> that mount option, but still there are several places access gc_thread
>>>>>> pointer without considering race condition, result in NULL point
>>>>>> dereference.
>>>>>
>>>>> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
>>>>
>>>> Better,
>>>>
>>>>> issue_discard_thread?
>>>>
>>>> There is more cases can be called from different scenarios:
>>>> - select_policy()
>>>> - need_SSR()
>>>
>>> No? They should be blocked by remount(2).
>>
>> How about below cases?
>>
>> - do_remount
> 
> Was there no guard to block any filesystem operations?

The only block point is f2fs_readonly in f2fs_sync_file, without holding
s_umount during ->fsync, so IMO, we need to cover need_SSR & select_gc_type, let
me update the patch later.

Thanks,

> If it's true, yeah, we need to cover them.
> 
>>  - do_remount_sb
>>   - remount_fs
>>    - f2fs_remount
>>     - stop_gc_thread
>> 					- fsync
>> 					 - f2fs_sync_file
>> 					  - file_write_and_wait_range
>> 					   - f2fs_write_data_pages
>> 					    - __write_data_page
>> 					     - should_update_inplace
>> 					      - check_inplace_update_policy
>> 					       - need_SSR
>>      : sbi->gc_thread = NULL;
>>
>> or
>>
>> 					     - write_data_page
>> 					      - allocate_data_block
>> 					       - allocate_segment
>> 					        - get_ssr_segment
>> 					         - select_gc_type
>>      : sbi->gc_thread = NULL;
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> In order to fix this issue, keep gc_thread structure valid in sbi all
>>>>>> the time instead of alloc/free it dynamically.
>>>>>>
>>>>>> In addition, add a rw semaphore to exclude rw operation in fields of
>>>>>> gc_thread.
>>>>>>
>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>> ---
>>>>>> v2:
>>>>>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
>>>>>>  fs/f2fs/debug.c   |  3 +--
>>>>>>  fs/f2fs/f2fs.h    |  9 ++++++-
>>>>>>  fs/f2fs/gc.c      | 78 ++++++++++++++++++++++++++++++++++++-------------------
>>>>>>  fs/f2fs/gc.h      |  1 +
>>>>>>  fs/f2fs/segment.c | 10 +++++--
>>>>>>  fs/f2fs/super.c   | 26 +++++++++++++------
>>>>>>  fs/f2fs/sysfs.c   | 26 ++++++++++++++-----
>>>>>>  7 files changed, 107 insertions(+), 46 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>>>>>> index a66107b5cfff..0fbd674c66fb 100644
>>>>>> --- a/fs/f2fs/debug.c
>>>>>> +++ b/fs/f2fs/debug.c
>>>>>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>>>>>>  	si->cache_mem = 0;
>>>>>>  
>>>>>>  	/* build gc */
>>>>>> -	if (sbi->gc_thread)
>>>>>> -		si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>>> +	si->cache_mem += sizeof(struct f2fs_gc_kthread);
>>>>>>  
>>>>>>  	/* build merge flush thread */
>>>>>>  	if (SM_I(sbi)->fcc_info)
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 8f3ad9662d13..75d3b4875429 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
>>>>>>  	return (struct sit_info *)(SM_I(sbi)->sit_info);
>>>>>>  }
>>>>>>  
>>>>>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	return (struct f2fs_gc_kthread *)(sbi->gc_thread);
>>>>>> +}
>>>>>> +
>>>>>>  static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	return (struct free_segmap_info *)(SM_I(sbi)->free_info);
>>>>>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
>>>>>>  /*
>>>>>>   * gc.c
>>>>>>   */
>>>>>> +int init_gc_context(struct f2fs_sb_info *sbi);
>>>>>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
>>>>>>  int start_gc_thread(struct f2fs_sb_info *sbi);
>>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
>>>>>>  block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>>>>>  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
>>>>>>  			unsigned int segno);
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index 70418b34c5f6..2febb84b2fd6 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -26,8 +26,8 @@
>>>>>>  static int gc_thread_func(void *data)
>>>>>>  {
>>>>>>  	struct f2fs_sb_info *sbi = data;
>>>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>>>> -	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
>>>>>>  	unsigned int wait_ms;
>>>>>>  
>>>>>>  	wait_ms = gc_th->min_sleep_time;
>>>>>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +int init_gc_context(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>>  	struct f2fs_gc_kthread *gc_th;
>>>>>> -	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>>>> -	int err = 0;
>>>>>>  
>>>>>>  	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
>>>>>> -	if (!gc_th) {
>>>>>> -		err = -ENOMEM;
>>>>>> -		goto out;
>>>>>> -	}
>>>>>> +	if (!gc_th)
>>>>>> +		return -ENOMEM;
>>>>>> +
>>>>>> +	gc_th->f2fs_gc_task = NULL;
>>>>>>  
>>>>>>  	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
>>>>>>  	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
>>>>>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>>  	gc_th->gc_urgent = 0;
>>>>>>  	gc_th->gc_wake= 0;
>>>>>>  
>>>>>> +	init_rwsem(&gc_th->gc_rwsem);
>>>>>> +
>>>>>>  	sbi->gc_thread = gc_th;
>>>>>> -	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
>>>>>> -	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	kfree(GC_I(sbi));
>>>>>> +	sbi->gc_thread = NULL;
>>>>>> +}
>>>>>> +
>>>>>> +int start_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +{
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	dev_t dev = sbi->sb->s_bdev->bd_dev;
>>>>>> +	int err = 0;
>>>>>> +
>>>>>> +	init_waitqueue_head(&gc_th->gc_wait_queue_head);
>>>>>> +	gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
>>>>>>  			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
>>>>>>  	if (IS_ERR(gc_th->f2fs_gc_task)) {
>>>>>>  		err = PTR_ERR(gc_th->f2fs_gc_task);
>>>>>> -		kfree(gc_th);
>>>>>> -		sbi->gc_thread = NULL;
>>>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>>>  	}
>>>>>> -out:
>>>>>> +
>>>>>>  	return err;
>>>>>>  }
>>>>>>  
>>>>>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
>>>>>>  {
>>>>>> -	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
>>>>>> -	if (!gc_th)
>>>>>> -		return;
>>>>>> -	kthread_stop(gc_th->f2fs_gc_task);
>>>>>> -	kfree(gc_th);
>>>>>> -	sbi->gc_thread = NULL;
>>>>>> +	struct f2fs_gc_kthread *gc_th = GC_I(sbi);
>>>>>> +	bool stopped = false;
>>>>>> +
>>>>>> +	down_write(&gc_th->gc_rwsem);
>>>>>> +	if (gc_th->f2fs_gc_task) {
>>>>>> +		kthread_stop(gc_th->f2fs_gc_task);
>>>>>> +		gc_th->f2fs_gc_task = NULL;
>>>>>> +		stopped = true;
>>>>>> +	}
>>>>>> +	up_write(&gc_th->gc_rwsem);
>>>>>> +
>>>>>> +	return stopped;
>>>>>>  }
>>>>>>  
>>>>>>  static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>>>  {
>>>>>>  	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
>>>>>>  
>>>>>> -	if (!gc_th)
>>>>>> -		return gc_mode;
>>>>>> +	down_read(&gc_th->gc_rwsem);
>>>>>> +	if (!gc_th->f2fs_gc_task)
>>>>>> +		goto out;
>>>>>>  
>>>>>>  	if (gc_th->gc_idle) {
>>>>>>  		if (gc_th->gc_idle == 1)
>>>>>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
>>>>>>  	}
>>>>>>  	if (gc_th->gc_urgent)
>>>>>>  		gc_mode = GC_GREEDY;
>>>>>> +out:
>>>>>> +	up_read(&gc_th->gc_rwsem);
>>>>>>  	return gc_mode;
>>>>>>  }
>>>>>>  
>>>>>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
>>>>>>  		p->max_search = dirty_i->nr_dirty[type];
>>>>>>  		p->ofs_unit = 1;
>>>>>>  	} else {
>>>>>> -		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
>>>>>> +		p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
>>>>>>  		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
>>>>>>  		p->max_search = dirty_i->nr_dirty[DIRTY];
>>>>>>  		p->ofs_unit = sbi->segs_per_sec;
>>>>>>  	}
>>>>>>  
>>>>>>  	/* we need to check every dirty segments in the FG_GC case */
>>>>>> -	if (gc_type != FG_GC &&
>>>>>> -			(sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
>>>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +	if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
>>>>>> +			!GC_I(sbi)->gc_urgent &&
>>>>>>  			p->max_search > sbi->max_victim_search)
>>>>>>  		p->max_search = sbi->max_victim_search;
>>>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  	/* let's select beginning hot/small space first in no_heap mode*/
>>>>>>  	if (test_opt(sbi, NOHEAP) &&
>>>>>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
>>>>>> index b0045d4c8d1e..9a5b273328c2 100644
>>>>>> --- a/fs/f2fs/gc.h
>>>>>> +++ b/fs/f2fs/gc.h
>>>>>> @@ -26,6 +26,7 @@
>>>>>>  #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
>>>>>>  
>>>>>>  struct f2fs_gc_kthread {
>>>>>> +	struct rw_semaphore gc_rwsem;		/* semaphore for f2fs_gc_task */
>>>>>>  	struct task_struct *f2fs_gc_task;
>>>>>>  	wait_queue_head_t gc_wait_queue_head;
>>>>>>  
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 5960768d26df..f787eea1b2f6 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
>>>>>>  
>>>>>>  	if (test_opt(sbi, LFS))
>>>>>>  		return false;
>>>>>> -	if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>>>> +	down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +	if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
>>>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  		return true;
>>>>>> +	}
>>>>>> +	up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
>>>>>>  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
>>>>>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
>>>>>>  		if (dcc->discard_wake)
>>>>>>  			dcc->discard_wake = 0;
>>>>>>  
>>>>>> -		if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
>>>>>> +		down_read(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
>>>>>>  			init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
>>>>>> +		up_read(&GC_I(sbi)->gc_rwsem);
>>>>>>  
>>>>>>  		sb_start_intwrite(sbi->sb);
>>>>>>  
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index dedb8e50b440..199f29dce86d 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
>>>>>>  	kfree(sbi->raw_super);
>>>>>>  
>>>>>>  	destroy_device_list(sbi);
>>>>>> +	destroy_gc_context(sbi);
>>>>>>  	mempool_destroy(sbi->write_io_dummy);
>>>>>>  #ifdef CONFIG_QUOTA
>>>>>>  	for (i = 0; i < MAXQUOTAS; i++)
>>>>>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>>>  	 * option. Also sync the filesystem.
>>>>>>  	 */
>>>>>>  	if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
>>>>>> -		if (sbi->gc_thread) {
>>>>>> -			stop_gc_thread(sbi);
>>>>>> -			need_restart_gc = true;
>>>>>> +		need_restart_gc = stop_gc_thread(sbi);
>>>>>> +	} else {
>>>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>>>> +			err = start_gc_thread(sbi);
>>>>>> +			if (err) {
>>>>>> +				up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +				goto restore_opts;
>>>>>> +			}
>>>>>> +			need_stop_gc = true;
>>>>>>  		}
>>>>>> -	} else if (!sbi->gc_thread) {
>>>>>> -		err = start_gc_thread(sbi);
>>>>>> -		if (err)
>>>>>> -			goto restore_opts;
>>>>>> -		need_stop_gc = true;
>>>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>>>  	}
>>>>>>  
>>>>>>  	if (*flags & SB_RDONLY ||
>>>>>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  		goto free_meta_inode;
>>>>>>  	}
>>>>>>  
>>>>>> +	err = init_gc_context(sbi);
>>>>>> +	if (err)
>>>>>> +		goto free_checkpoint;
>>>>>> +
>>>>>>  	/* Initialize device list */
>>>>>>  	err = f2fs_scan_devices(sbi);
>>>>>>  	if (err) {
>>>>>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>>  	destroy_segment_manager(sbi);
>>>>>>  free_devices:
>>>>>>  	destroy_device_list(sbi);
>>>>>> +	destroy_gc_context(sbi);
>>>>>> +free_checkpoint:
>>>>>>  	kfree(sbi->ckpt);
>>>>>>  free_meta_inode:
>>>>>>  	make_bad_inode(sbi->meta_inode);
>>>>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>>>>> index 2c53de9251be..b8d09935e43f 100644
>>>>>> --- a/fs/f2fs/sysfs.c
>>>>>> +++ b/fs/f2fs/sysfs.c
>>>>>> @@ -46,7 +46,7 @@ struct f2fs_attr {
>>>>>>  static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
>>>>>>  {
>>>>>>  	if (struct_type == GC_THREAD)
>>>>>> -		return (unsigned char *)sbi->gc_thread;
>>>>>> +		return (unsigned char *)GC_I(sbi);
>>>>>>  	else if (struct_type == SM_INFO)
>>>>>>  		return (unsigned char *)SM_I(sbi);
>>>>>>  	else if (struct_type == DCC_INFO)
>>>>>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
>>>>>>  	if (!strcmp(a->attr.name, "trim_sections"))
>>>>>>  		return -EINVAL;
>>>>>>  
>>>>>> -	*ui = t;
>>>>>> -
>>>>>> -	if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>>>>> +	if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
>>>>>>  		f2fs_reset_iostat(sbi);
>>>>>> -	if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
>>>>>> -		sbi->gc_thread->gc_wake = 1;
>>>>>> -		wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
>>>>>> +
>>>>>> +	if (a->struct_type == GC_THREAD) {
>>>>>> +		down_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +		if (!GC_I(sbi)->f2fs_gc_task) {
>>>>>> +			up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +			return -EINVAL;
>>>>>> +		}
>>>>>> +	}
>>>>>> +
>>>>>> +	if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
>>>>>> +		GC_I(sbi)->gc_wake = 1;
>>>>>> +		wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
>>>>>>  		wake_up_discard_thread(sbi, true);
>>>>>>  	}
>>>>>>  
>>>>>> +	*ui = t;
>>>>>> +
>>>>>> +	if (a->struct_type == GC_THREAD)
>>>>>> +		up_write(&GC_I(sbi)->gc_rwsem);
>>>>>> +
>>>>>>  	return count;
>>>>>>  }
>>>>>>  
>>>>>> -- 
>>>>>> 2.15.0.55.gc2ece9dc4de6
>>>>>
>>>>> .
>>>>>
>>>
>>> .
>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-05-08  5:49 ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-08  5:49 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

Thread A			Thread B
- f2fs_remount
 - stop_gc_thread
				- f2fs_sbi_store
   sbi->gc_thread = NULL;
				  access sbi->gc_thread->gc_*

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, use sb->s_umount to exclude those operations.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
v2:
- fix to cover __struct_ptr() with sb->s_umount suggested by Jaegeuk.
 fs/f2fs/sysfs.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 7432192ebe17..79f4e4ac8200 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -168,7 +168,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
 }
 
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+static ssize_t __f2fs_sbi_store(struct f2fs_attr *a,
 			struct f2fs_sb_info *sbi,
 			const char *buf, size_t count)
 {
@@ -261,6 +261,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 	return count;
 }
 
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi,
+			const char *buf, size_t count)
+{
+	ssize_t ret;
+	bool gc_entry = (a->struct_type == GC_THREAD);
+
+	if (gc_entry)
+		down_read(&sbi->sb->s_umount);
+	ret = __f2fs_sbi_store(a, sbi, buf, count);
+	if (gc_entry)
+		up_read(&sbi->sb->s_umount);
+
+	return ret;
+}
+
 static ssize_t f2fs_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
 {
-- 
2.17.0.391.g1f1cddd558b5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer
@ 2018-05-08  5:49 ` Chao Yu
  0 siblings, 0 replies; 16+ messages in thread
From: Chao Yu @ 2018-05-08  5:49 UTC (permalink / raw)
  To: jaegeuk; +Cc: linux-f2fs-devel, linux-kernel, chao, Chao Yu

Thread A			Thread B
- f2fs_remount
 - stop_gc_thread
				- f2fs_sbi_store
   sbi->gc_thread = NULL;
				  access sbi->gc_thread->gc_*

Previously, we allocate memory for sbi->gc_thread based on background
gc thread mount option, the memory can be released if we turn off
that mount option, but still there are several places access gc_thread
pointer without considering race condition, result in NULL point
dereference.

In order to fix this issue, use sb->s_umount to exclude those operations.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
v2:
- fix to cover __struct_ptr() with sb->s_umount suggested by Jaegeuk.
 fs/f2fs/sysfs.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 7432192ebe17..79f4e4ac8200 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -168,7 +168,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
 }
 
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+static ssize_t __f2fs_sbi_store(struct f2fs_attr *a,
 			struct f2fs_sb_info *sbi,
 			const char *buf, size_t count)
 {
@@ -261,6 +261,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 	return count;
 }
 
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi,
+			const char *buf, size_t count)
+{
+	ssize_t ret;
+	bool gc_entry = (a->struct_type == GC_THREAD);
+
+	if (gc_entry)
+		down_read(&sbi->sb->s_umount);
+	ret = __f2fs_sbi_store(a, sbi, buf, count);
+	if (gc_entry)
+		up_read(&sbi->sb->s_umount);
+
+	return ret;
+}
+
 static ssize_t f2fs_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
 {
-- 
2.17.0.391.g1f1cddd558b5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-05-08  5:49 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-24  2:42 [PATCH v2] f2fs: fix to avoid race during access gc_thread pointer Chao Yu
2018-04-24  2:42 ` Chao Yu
2018-04-26 16:03 ` Jaegeuk Kim
2018-04-27  2:04   ` Chao Yu
2018-04-27  2:04     ` Chao Yu
2018-04-27  2:07     ` Chao Yu
2018-04-27  2:07       ` Chao Yu
2018-04-28  2:37       ` Jaegeuk Kim
2018-04-28  2:36     ` Jaegeuk Kim
2018-05-02  1:47       ` Chao Yu
2018-05-02  1:47         ` Chao Yu
2018-05-04 18:59         ` Jaegeuk Kim
2018-05-05  2:21           ` Chao Yu
2018-05-05  2:21             ` Chao Yu
2018-05-08  5:49 Chao Yu
2018-05-08  5:49 ` Chao Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.