All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection
@ 2020-06-05 16:46 Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status Bob Peterson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

I've revised my recent patch set based on feedback from Andreas.
Here is my latest. Changes from last time are:

1. Removed the spin_trylock related info from the status sysfs file
   as per Andreas's feedback. Although valuable to have, I see his point.
2. Removed "gfs2: Add new trace point for glock ail management".
   I still think this would be useful to have, but perhaps only compiled
   in with a new debug kernel option. For now, leave it out.
3. Removed "gfs2: add memory barriers to gfs2_glock_remove_revoke".
   It is not needed.
4. Removed "gfs2: Only do glock put in gfs2_create_inode for free inodes".
   It was pushed to the repo separately.
5. Revised "gfs2: instrumentation wrt log_flush stuck" moving that code
   to a new function, which makes the err string contiguous for grep.
6. Added "gfs2: new slab for transactions" which is revised to fix bugs
   that were in the old version.
7. Added "gfs2: fix use-after-free on transaction ail lists" that fixes
   a problem that comes to light with the previous patch.
8. Patches 1/8 and 2/8 are omitted, since they were pushed separately.

Bob Peterson (6):
  gfs2: Add new sysfs file for gfs2 status
  gfs2: print mapping->nrpages in glock dump for address space glocks
  gfs2: instrumentation wrt log_flush stuck
  gfs2: introduce new gfs2_glock_assert_withdraw
  gfs2: new slab for transactions
  gfs2: fix use-after-free on transaction ail lists

 fs/gfs2/glock.c | 32 +++++++++++++++---------
 fs/gfs2/glock.h |  9 +++++++
 fs/gfs2/glops.c |  2 ++
 fs/gfs2/log.c   | 54 ++++++++++++++++++++++++++++------------
 fs/gfs2/main.c  |  9 +++++++
 fs/gfs2/sys.c   | 65 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/trans.c | 22 ++++++++++++++---
 fs/gfs2/trans.h |  1 +
 fs/gfs2/util.c  |  1 +
 fs/gfs2/util.h  |  1 +
 10 files changed, 165 insertions(+), 31 deletions(-)

-- 
2.26.2



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
@ 2020-06-05 16:46 ` Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 2/6] gfs2: print mapping->nrpages in glock dump for address space glocks Bob Peterson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds a new file: /sys/fs/gfs2/*/status which will report
the status of the file system. Catting this file dumps the current
status of the file system according to various superblock variables.
For example:

Journal Checked:   1
Journal Live:      1
Withdrawn:         0
No barriers:       0
No recovery:       0
Demote:            0
No Journal ID:     1
RO Recovery:       0
Skip DLM Unlock:   0
Force AIL Flush:   0
FS Frozen:         0
Withdrawing:       0
Withdraw In Prog:  0
Remote Withdraw:   0
Withdraw Recovery: 0
sd_log_lock held:  0
statfs_spin held:  0
sd_rindex_spin:    0
sd_jindex_spin:    0
sd_trunc_lock:     0
sd_bitmap_lock:    0
sd_ordered_lock:   0
sd_ail_lock:       0
sd_log_error:      0
sd_log_flush_lock: 0
sd_log_flush_lock: 1
sd_log_in_flight:  1

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/sys.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d28c41bd69b0..572353f0b41f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -63,6 +63,69 @@ static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 			MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
 }
 
+static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
+{
+	unsigned long f = sdp->sd_flags;
+	ssize_t s;
+
+	s = snprintf(buf, PAGE_SIZE,
+		     "Journal Checked:   %d\n"
+		     "Journal Live:      %d\n"
+		     "Withdrawn:         %d\n"
+		     "No barriers:       %d\n"
+		     "No recovery:       %d\n"
+		     "Demote:            %d\n"
+		     "No Journal ID:     %d\n"
+		     "RO Recovery:       %d\n"
+		     "Skip DLM Unlock:   %d\n"
+		     "Force AIL Flush:   %d\n"
+		     "FS Frozen:         %d\n"
+		     "Withdrawing:       %d\n"
+		     "Withdraw In Prog:  %d\n"
+		     "Remote Withdraw:   %d\n"
+		     "Withdraw Recovery: %d\n"
+		     "sd_log_lock held:  %d\n"
+		     "statfs_spin held:  %d\n"
+		     "sd_rindex_spin:    %d\n"
+		     "sd_jindex_spin:    %d\n"
+		     "sd_trunc_lock:     %d\n"
+		     "sd_bitmap_lock:    %d\n"
+		     "sd_ordered_lock:   %d\n"
+		     "sd_ail_lock:       %d\n"
+		     "sd_log_error:      %d\n"
+		     "sd_log_flush_lock: %d\n"
+		     "sd_log_num_revoke: %u\n"
+		     "sd_log_in_flight:  %d\n",
+		     test_bit(SDF_JOURNAL_CHECKED, &f),
+		     test_bit(SDF_JOURNAL_LIVE, &f),
+		     test_bit(SDF_WITHDRAWN, &f),
+		     test_bit(SDF_NOBARRIERS, &f),
+		     test_bit(SDF_NORECOVERY, &f),
+		     test_bit(SDF_DEMOTE, &f),
+		     test_bit(SDF_NOJOURNALID, &f),
+		     test_bit(SDF_RORECOVERY, &f),
+		     test_bit(SDF_SKIP_DLM_UNLOCK, &f),
+		     test_bit(SDF_FORCE_AIL_FLUSH, &f),
+		     test_bit(SDF_FS_FROZEN, &f),
+		     test_bit(SDF_WITHDRAWING, &f),
+		     test_bit(SDF_WITHDRAW_IN_PROG, &f),
+		     test_bit(SDF_REMOTE_WITHDRAW, &f),
+		     test_bit(SDF_WITHDRAW_RECOVERY, &f),
+		     spin_is_locked(&sdp->sd_log_lock),
+		     spin_is_locked(&sdp->sd_statfs_spin),
+		     spin_is_locked(&sdp->sd_rindex_spin),
+		     spin_is_locked(&sdp->sd_jindex_spin),
+		     spin_is_locked(&sdp->sd_trunc_lock),
+		     spin_is_locked(&sdp->sd_bitmap_lock),
+		     spin_is_locked(&sdp->sd_ordered_lock),
+		     spin_is_locked(&sdp->sd_ail_lock),
+		     sdp->sd_log_error,
+		     rwsem_is_locked(&sdp->sd_log_flush_lock),
+		     sdp->sd_log_num_revoke,
+		     atomic_read(&sdp->sd_log_in_flight));
+	return s;
+}
+
 static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
@@ -283,6 +346,7 @@ GFS2_ATTR(quota_sync,          0200, NULL,          quota_sync_store);
 GFS2_ATTR(quota_refresh_user,  0200, NULL,          quota_refresh_user_store);
 GFS2_ATTR(quota_refresh_group, 0200, NULL,          quota_refresh_group_store);
 GFS2_ATTR(demote_rq,           0200, NULL,	    demote_rq_store);
+GFS2_ATTR(status,              0444, status_show,   NULL);
 
 static struct attribute *gfs2_attrs[] = {
 	&gfs2_attr_id.attr,
@@ -295,6 +359,7 @@ static struct attribute *gfs2_attrs[] = {
 	&gfs2_attr_quota_refresh_user.attr,
 	&gfs2_attr_quota_refresh_group.attr,
 	&gfs2_attr_demote_rq.attr,
+	&gfs2_attr_status.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(gfs2);
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 2/6] gfs2: print mapping->nrpages in glock dump for address space glocks
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status Bob Peterson
@ 2020-06-05 16:46 ` Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 3/6] gfs2: instrumentation wrt log_flush stuck Bob Peterson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch makes the glock dumps in debugfs print the number of pages
(nrpages) for address space glocks. This will aid in debugging.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/glock.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index bf70e3b14938..9a5dadc93cfc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1978,7 +1978,13 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
 	char gflags_buf[32];
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
+	unsigned long nrpages = 0;
 
+	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
+		struct address_space *mapping = gfs2_glock2aspace(gl);
+
+		nrpages = mapping->nrpages;
+	}
 	memset(fs_id_buf, 0, sizeof(fs_id_buf));
 	if (fsid && sdp) /* safety precaution */
 		sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
@@ -1987,15 +1993,16 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
 	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
 		dtime = 0;
 	gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
-		       "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
-		  gl->gl_name.ln_type,
-		  (unsigned long long)gl->gl_name.ln_number,
-		  gflags2str(gflags_buf, gl),
-		  state2str(gl->gl_target),
-		  state2str(gl->gl_demote_state), dtime,
-		  atomic_read(&gl->gl_ail_count),
-		  atomic_read(&gl->gl_revokes),
-		  (int)gl->gl_lockref.count, gl->gl_hold_time);
+		       "v:%d r:%d m:%ld p:%lu\n",
+		       fs_id_buf, state2str(gl->gl_state),
+		       gl->gl_name.ln_type,
+		       (unsigned long long)gl->gl_name.ln_number,
+		       gflags2str(gflags_buf, gl),
+		       state2str(gl->gl_target),
+		       state2str(gl->gl_demote_state), dtime,
+		       atomic_read(&gl->gl_ail_count),
+		       atomic_read(&gl->gl_revokes),
+		       (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages);
 
 	list_for_each_entry(gh, &gl->gl_holders, gh_list)
 		dump_holder(seq, gh, fs_id_buf);
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 3/6] gfs2: instrumentation wrt log_flush stuck
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 2/6] gfs2: print mapping->nrpages in glock dump for address space glocks Bob Peterson
@ 2020-06-05 16:46 ` Bob Peterson
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 4/6] gfs2: introduce new gfs2_glock_assert_withdraw Bob Peterson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This adds checks for gfs2_log_flush being stuck, similarly to the check
in gfs2_ail1_flush. To faciliate this and make the strings easy to grep
we move the ail1 emptying to its own function, empty_ail1_list.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/log.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0644e58c6191..fcc7f58d74f0 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -145,9 +145,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp)
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
 
-	fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
-	       current->journal_info ? 1 : 0);
-
 	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
 		list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
 					    bd_ail_st_list) {
@@ -197,6 +194,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
 restart:
 	ret = 0;
 	if (time_after(jiffies, flush_start + (HZ * 600))) {
+		fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
+		       __func__, current->journal_info ? 1 : 0);
 		dump_ail_list(sdp);
 		goto out;
 	}
@@ -876,6 +875,28 @@ static void ail_drain(struct gfs2_sbd *sdp)
 	spin_unlock(&sdp->sd_ail_lock);
 }
 
+/**
+ * empty_ail1_list - try to start IO and empty the ail1 list
+ * @sdp: Pointer to GFS2 superblock
+ */
+static void empty_ail1_list(struct gfs2_sbd *sdp)
+{
+	unsigned long start = jiffies;
+
+	for (;;) {
+		if (time_after(jiffies, start + (HZ * 600))) {
+			fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
+			       __func__, current->journal_info ? 1 : 0);
+			dump_ail_list(sdp);
+			return;
+		}
+		gfs2_ail1_start(sdp);
+		gfs2_ail1_wait(sdp);
+		if (gfs2_ail1_empty(sdp, 0))
+			return;
+	}
+}
+
 /**
  * gfs2_log_flush - flush incore transaction(s)
  * @sdp: the filesystem
@@ -965,12 +986,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 
 	if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
 		if (!sdp->sd_log_idle) {
-			for (;;) {
-				gfs2_ail1_start(sdp);
-				gfs2_ail1_wait(sdp);
-				if (gfs2_ail1_empty(sdp, 0))
-					break;
-			}
+			empty_ail1_list(sdp);
 			if (gfs2_withdrawn(sdp))
 				goto out;
 			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 4/6] gfs2: introduce new gfs2_glock_assert_withdraw
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
                   ` (2 preceding siblings ...)
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 3/6] gfs2: instrumentation wrt log_flush stuck Bob Peterson
@ 2020-06-05 16:46 ` Bob Peterson
  2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 5/6] gfs2: new slab for transactions Bob Peterson
  2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 6/6] gfs2: fix use-after-free on transaction ail lists Bob Peterson
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Before this patch, asserts based on glocks did not print the glock with
the error. This patch introduces a new macro, gfs2_glock_assert_withdraw
which first prints the glock, then takes the assert.

This also changes a few glock asserts to the new macro.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/glock.c | 7 ++++---
 fs/gfs2/glock.h | 9 +++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9a5dadc93cfc..64541d8bf9ad 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -164,7 +164,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
-	BUG_ON(atomic_read(&gl->gl_revokes));
+	gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
 	rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
 	smp_mb();
 	wake_up_glock(gl);
@@ -626,7 +626,8 @@ __acquires(&gl->gl_lockref.lock)
 		 */
 		if ((atomic_read(&gl->gl_ail_count) != 0) &&
 		    (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
-			gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
+			gfs2_glock_assert_warn(gl,
+					       !atomic_read(&gl->gl_ail_count));
 			gfs2_dump_glock(NULL, gl, true);
 		}
 		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
@@ -1836,7 +1837,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
 	int ret;
 
 	ret = gfs2_truncatei_resume(ip);
-	gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
+	gfs2_glock_assert_withdraw(gl, ret == 0);
 
 	spin_lock(&gl->gl_lockref.lock);
 	clear_bit(GLF_LOCK, &gl->gl_flags);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b8adaf80e4c5..1c547dff7c57 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -205,6 +205,15 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
 #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) {		\
 			gfs2_dump_glock(NULL, gl, true);	\
 			BUG(); } } while(0)
+#define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) {	\
+			gfs2_dump_glock(NULL, gl, true);		\
+			gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \
+	while (0)
+#define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) {	\
+			gfs2_dump_glock(NULL, gl, true);		\
+			gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
+	while (0)
+
 extern __printf(2, 3)
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 5/6] gfs2: new slab for transactions
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
                   ` (3 preceding siblings ...)
  2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 4/6] gfs2: introduce new gfs2_glock_assert_withdraw Bob Peterson
@ 2020-06-05 16:47 ` Bob Peterson
  2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 6/6] gfs2: fix use-after-free on transaction ail lists Bob Peterson
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:47 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds a new slab for gfs2 transactions. That allows us to
have an initialization function and protect against some errors.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/glops.c |  2 ++
 fs/gfs2/log.c   |  9 +++++----
 fs/gfs2/main.c  |  9 +++++++++
 fs/gfs2/trans.c | 22 ++++++++++++++++++----
 fs/gfs2/trans.h |  1 +
 fs/gfs2/util.c  |  1 +
 fs/gfs2/util.h  |  1 +
 7 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4862dae868a2..224fb3bd503c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 	memset(&tr, 0, sizeof(tr));
 	INIT_LIST_HEAD(&tr.tr_buf);
 	INIT_LIST_HEAD(&tr.tr_databuf);
+	INIT_LIST_HEAD(&tr.tr_ail1_list);
+	INIT_LIST_HEAD(&tr.tr_ail2_list);
 	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
 
 	if (!tr.tr_revokes) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index fcc7f58d74f0..64efa3f743af 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -30,6 +30,7 @@
 #include "util.h"
 #include "dir.h"
 #include "trace_gfs2.h"
+#include "trans.h"
 
 static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 
@@ -378,7 +379,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 		list_del(&tr->tr_list);
 		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
 		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
-		kfree(tr);
+		gfs2_trans_free(sdp, tr);
 	}
 
 	spin_unlock(&sdp->sd_ail_lock);
@@ -863,14 +864,14 @@ static void ail_drain(struct gfs2_sbd *sdp)
 		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
 		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
 		list_del(&tr->tr_list);
-		kfree(tr);
+		gfs2_trans_free(sdp, tr);
 	}
 	while (!list_empty(&sdp->sd_ail2_list)) {
 		tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
 				      tr_list);
 		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
 		list_del(&tr->tr_list);
-		kfree(tr);
+		gfs2_trans_free(sdp, tr);
 	}
 	spin_unlock(&sdp->sd_ail_lock);
 }
@@ -1010,7 +1011,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	trace_gfs2_log_flush(sdp, 0, flags);
 	up_write(&sdp->sd_log_flush_lock);
 
-	kfree(tr);
+	gfs2_trans_free(sdp, tr);
 }
 
 /**
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a1a295b739fb..733470ca6be9 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_qadata_cachep)
 		goto fail_cachep7;
 
+	gfs2_trans_cachep = kmem_cache_create("gfs2_trans",
+					       sizeof(struct gfs2_trans),
+					       0, 0, NULL);
+	if (!gfs2_trans_cachep)
+		goto fail_cachep8;
+
 	error = register_shrinker(&gfs2_qd_shrinker);
 	if (error)
 		goto fail_shrinker;
@@ -194,6 +200,8 @@ static int __init init_gfs2_fs(void)
 fail_fs1:
 	unregister_shrinker(&gfs2_qd_shrinker);
 fail_shrinker:
+	kmem_cache_destroy(gfs2_trans_cachep);
+fail_cachep8:
 	kmem_cache_destroy(gfs2_qadata_cachep);
 fail_cachep7:
 	kmem_cache_destroy(gfs2_quotad_cachep);
@@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void)
 	rcu_barrier();
 
 	mempool_destroy(gfs2_page_pool);
+	kmem_cache_destroy(gfs2_trans_cachep);
 	kmem_cache_destroy(gfs2_qadata_cachep);
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ffe840505082..aa7cd0abb369 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 		return -EROFS;
 
-	tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
+	tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
 	if (!tr)
 		return -ENOMEM;
 
@@ -53,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	INIT_LIST_HEAD(&tr->tr_databuf);
 	INIT_LIST_HEAD(&tr->tr_buf);
 
+	INIT_LIST_HEAD(&tr->tr_ail1_list);
+	INIT_LIST_HEAD(&tr->tr_ail2_list);
+
 	sb_start_intwrite(sdp->sd_vfs);
 
 	error = gfs2_log_reserve(sdp, tr->tr_reserved);
@@ -65,7 +68,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 
 fail:
 	sb_end_intwrite(sdp->sd_vfs);
-	kfree(tr);
+	kmem_cache_free(gfs2_trans_cachep, tr);
 
 	return error;
 }
@@ -93,7 +96,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
 		gfs2_log_release(sdp, tr->tr_reserved);
 		if (alloced) {
-			kfree(tr);
+			gfs2_trans_free(sdp, tr);
 			sb_end_intwrite(sdp->sd_vfs);
 		}
 		return;
@@ -109,7 +112,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 
 	gfs2_log_commit(sdp, tr);
 	if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
-		kfree(tr);
+		gfs2_trans_free(sdp, tr);
 	up_read(&sdp->sd_log_flush_lock);
 
 	if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
@@ -276,3 +279,14 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
 	gfs2_log_unlock(sdp);
 }
 
+void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	if (tr == NULL)
+		return;
+
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_databuf));
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_buf));
+	kmem_cache_free(gfs2_trans_cachep, tr);
+}
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 6071334de035..83199ce5a5c5 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -42,5 +42,6 @@ extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
 extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
 extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
+extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
 
 #endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index aa087a5675af..1cd0328cae20 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -32,6 +32,7 @@ struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
 struct kmem_cache *gfs2_qadata_cachep __read_mostly;
+struct kmem_cache *gfs2_trans_cachep __read_mostly;
 mempool_t *gfs2_page_pool __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index a3542560da6f..6d9157efe16c 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -172,6 +172,7 @@ extern struct kmem_cache *gfs2_bufdata_cachep;
 extern struct kmem_cache *gfs2_rgrpd_cachep;
 extern struct kmem_cache *gfs2_quotad_cachep;
 extern struct kmem_cache *gfs2_qadata_cachep;
+extern struct kmem_cache *gfs2_trans_cachep;
 extern mempool_t *gfs2_page_pool;
 extern struct workqueue_struct *gfs2_control_wq;
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [GFS2 PATCH 6/6] gfs2: fix use-after-free on transaction ail lists
  2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
                   ` (4 preceding siblings ...)
  2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 5/6] gfs2: new slab for transactions Bob Peterson
@ 2020-06-05 16:47 ` Bob Peterson
  5 siblings, 0 replies; 7+ messages in thread
From: Bob Peterson @ 2020-06-05 16:47 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Before this patch, transactions could be merged into the system
transaction by function gfs2_merge_trans(), but then it is not
considered attached. The caller, log_refund, only sets TR_ATTACHED
if the transaction is not merged. Later, in function, gfs2_trans_end,
if TR_ATTACHED is not set (because it was merged) the transaction is
freed by gfs2_trans_free(). However, by the time this happens, the
log flush mechanism may have queued bd elements to the ail list,
which means tr_ail1_list may not be empty, and the flush may forget
about the bd elements altogether (memory leak, because it focuses
on the sdp transaction instead) or worse, reference them after the
transaction has been freed.

Although I've not seen any serious consequences, the problem becomes
apparent with the previous patch's addition of:

	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));

to function gfs2_trans_free().

This patch adds logic into gfs2_merge_trans() to move the merged
transaction's ail lists to the sdp transaction. This prevents the
use-after-free.

To facilitate this, we pass sdp into the function instead of the
transaction itself.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/log.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 64efa3f743af..5a2bc70b0859 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -1020,8 +1020,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
  * @new: New transaction to be merged
  */
 
-static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
+static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
 {
+	struct gfs2_trans *old = sdp->sd_log_tr;
+
 	WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
 
 	old->tr_num_buf_new	+= new->tr_num_buf_new;
@@ -1033,6 +1035,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
 
 	list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
 	list_splice_tail_init(&new->tr_buf, &old->tr_buf);
+
+	spin_lock(&sdp->sd_ail_lock);
+	list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
+	list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
+	spin_unlock(&sdp->sd_ail_lock);
 }
 
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -1044,7 +1051,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	gfs2_log_lock(sdp);
 
 	if (sdp->sd_log_tr) {
-		gfs2_merge_trans(sdp->sd_log_tr, tr);
+		gfs2_merge_trans(sdp, tr);
 	} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
 		gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
 		sdp->sd_log_tr = tr;
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-06-05 16:47 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-05 16:46 [Cluster-devel] [GFS2 PATCH 0/6 v2] Misc Patch Collection Bob Peterson
2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 1/6] gfs2: Add new sysfs file for gfs2 status Bob Peterson
2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 2/6] gfs2: print mapping->nrpages in glock dump for address space glocks Bob Peterson
2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 3/6] gfs2: instrumentation wrt log_flush stuck Bob Peterson
2020-06-05 16:46 ` [Cluster-devel] [GFS2 PATCH 4/6] gfs2: introduce new gfs2_glock_assert_withdraw Bob Peterson
2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 5/6] gfs2: new slab for transactions Bob Peterson
2020-06-05 16:47 ` [Cluster-devel] [GFS2 PATCH 6/6] gfs2: fix use-after-free on transaction ail lists Bob Peterson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.