All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 0/3] Trimming the ordered write inode list
@ 2017-12-12 17:22 Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 1/3] gfs2: ordered list instrumentation Abhi Das
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Abhi Das @ 2017-12-12 17:22 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This is a RHEL7 patchset to address bz 1511599.

The first patch in this series is just an instrumentation patch to assess
the impact of the subsequent patches on the ordered write list. It is not
intended to be merged into the code base.

The second patch adds a function gfs2_ordered_shrink() which attempts to
remove inodes from the ordered list that don't have any pages in need of
writing. This function is called from gfs2_sync_fs().

The third patch does a few things:
- Call gfs2_ordered_write() in case we didn't trim the ordered write list
  in gfs2_ordered_shrink().
- Have gfs2_ordered_write() trim the list of inodes that don't have pages
  that need to be written. Should happen every time on a log flush when
  gfs2_log_flush() calls gfs2_ordered_write()
- Call gfs2_ordered_del_inode() from gfs2_fsync()

Other ideas:
- Change gfs2_ordered_write() so that it can work on a subset of the
  ordered list. That way, we can call it to write a percentage (or N
  inodes), wait for the writes to complete and then remove those inodes
  from the list.

Abhi Das (3):
  gfs2: ordered list instrumentation
  gfs2: try to free empty mapping inodes from ordered list
  gfs2: ordered write list addendum patch

 fs/gfs2/bmap.c       |  2 +-
 fs/gfs2/file.c       |  5 ++++-
 fs/gfs2/incore.h     | 13 +++++++++++++
 fs/gfs2/log.c        | 36 ++++++++++++++++++++++++++++++++----
 fs/gfs2/log.h        | 38 ++++++++++++++++++++++++++++++++++++--
 fs/gfs2/ops_fstype.c |  1 +
 fs/gfs2/quota.c      |  8 ++++++++
 fs/gfs2/super.c      |  6 ++++--
 8 files changed, 99 insertions(+), 10 deletions(-)

-- 
2.4.11



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 1/3] gfs2: ordered list instrumentation
  2017-12-12 17:22 [Cluster-devel] [RFC RHEL7 GFS2 PATCH 0/3] Trimming the ordered write inode list Abhi Das
@ 2017-12-12 17:22 ` Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch Abhi Das
  2 siblings, 0 replies; 7+ messages in thread
From: Abhi Das @ 2017-12-12 17:22 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Keep stats on the size of the ordered list and keep track of
where it's added to and removed from
---
 fs/gfs2/bmap.c       |  2 +-
 fs/gfs2/file.c       |  2 +-
 fs/gfs2/incore.h     | 12 ++++++++++++
 fs/gfs2/log.c        |  7 +++++--
 fs/gfs2/log.h        | 35 +++++++++++++++++++++++++++++++++--
 fs/gfs2/ops_fstype.c |  1 +
 fs/gfs2/quota.c      |  8 ++++++++
 fs/gfs2/super.c      |  2 +-
 8 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8569bf3..d9720ce 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1504,7 +1504,7 @@ static int trunc_end(struct gfs2_inode *ip)
 		ip->i_height = 0;
 		ip->i_goal = ip->i_no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
-		gfs2_ordered_del_inode(ip);
+		gfs2_ordered_del_inode(ip, ORD_WHENCE_TRUNC);
 	}
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 31b5986..757ec66 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -284,7 +284,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 		if (error)
 			goto out;
 		if (new_flags & GFS2_DIF_JDATA)
-			gfs2_ordered_del_inode(ip);
+			gfs2_ordered_del_inode(ip, ORD_WHENCE_SETFLAGS);
 	}
 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
 	if (error)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7e78d8a..6fcad2a 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -655,6 +655,17 @@ struct gfs2_pcpu_lkstats {
 	struct gfs2_lkstats lkstats[10];
 };
 
+struct ord_stats {
+	unsigned long os_ct;
+	unsigned long os_add;
+	unsigned long os_rm_trunc;
+	unsigned long os_rm_evict;
+	unsigned long os_rm_wait;
+	unsigned long os_rm_syncfs;
+	unsigned long os_rm_write;
+	unsigned long os_rm_setflags;
+};
+
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
 	struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
@@ -773,6 +784,7 @@ struct gfs2_sbd {
 
 	struct list_head sd_log_le_revoke;
 	struct list_head sd_log_le_ordered;
+	struct ord_stats sd_ord_stats;
 	spinlock_t sd_ordered_lock;
 
 	atomic_t sd_log_thresh1;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cc3f7d1..0257704 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -526,6 +526,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 	while (!list_empty(&sdp->sd_log_le_ordered)) {
 		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
 		list_del(&ip->i_ordered);
+		ord_stats_adjust(sdp, -1, ORD_WHENCE_WAIT);
 		WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
 		if (ip->i_inode.i_mapping->nrpages == 0)
 			continue;
@@ -536,13 +537,15 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 	spin_unlock(&sdp->sd_ordered_lock);
 }
 
-void gfs2_ordered_del_inode(struct gfs2_inode *ip)
+void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
 	spin_lock(&sdp->sd_ordered_lock);
-	if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
+	if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
 		list_del(&ip->i_ordered);
+		ord_stats_adjust(sdp, -1, whence);
+	}
 	spin_unlock(&sdp->sd_ordered_lock);
 }
 
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3721663..bf7729c 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,18 +48,49 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
 	sdp->sd_log_head = sdp->sd_log_tail = value;
 }
 
+enum {
+	ORD_WHENCE_TRUNC        = 0,
+	ORD_WHENCE_EVICT        = 1,
+	ORD_WHENCE_WAIT         = 2,
+	ORD_WHENCE_SYNCFS       = 3,
+	ORD_WHENCE_ORD_WRITE    = 4,
+	ORD_WHENCE_SETFLAGS     = 5,
+	ORD_WHENCE_ADD          = 6,
+};
+
+static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
+{
+	struct ord_stats *os = &sdp->sd_ord_stats;
+
+	os->os_ct += count;
+	switch (whence) {
+	case ORD_WHENCE_TRUNC:      os->os_rm_trunc += -(count); break;
+	case ORD_WHENCE_EVICT:      os->os_rm_evict += -(count); break;
+	case ORD_WHENCE_WAIT:       os->os_rm_wait += -(count); break;
+	case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
+	case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
+	case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
+
+	case ORD_WHENCE_ADD:        os->os_add += count; break;
+	default: break;
+	}
+}
+
 static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
 	if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
 		spin_lock(&sdp->sd_ordered_lock);
-		if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
+		if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags)) {
 			list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
+			ord_stats_adjust(sdp, 1, ORD_WHENCE_ADD);
+		}
 		spin_unlock(&sdp->sd_ordered_lock);
 	}
 }
-extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
+
+extern void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 			    unsigned int ssize);
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 228f38e..378f961 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -104,6 +104,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	atomic_set(&sdp->sd_log_pinned, 0);
 	INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
 	INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
+	memset(&sdp->sd_ord_stats, 0, sizeof(struct ord_stats));
 	spin_lock_init(&sdp->sd_ordered_lock);
 
 	init_waitqueue_head(&sdp->sd_log_waitq);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3d5f868..66c5126 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1572,6 +1572,14 @@ int gfs2_quotad(void *data)
 		else
 			t = 0;
 		finish_wait(&sdp->sd_quota_wait, &wait);
+
+		printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
+		       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
+		       " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
+		       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
+		       sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
+		       sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
+		       sdp->sd_ord_stats.os_rm_setflags);
 	}
 
 	return 0;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6d7fb54..ee15a50 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1641,7 +1641,7 @@ out:
 	/* Case 3 starts here */
 	truncate_inode_pages_final(&inode->i_data);
 	gfs2_rsqa_delete(ip, NULL);
-	gfs2_ordered_del_inode(ip);
+	gfs2_ordered_del_inode(ip, ORD_WHENCE_EVICT);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	glock_clear_object(ip->i_gl, ip);
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list
  2017-12-12 17:22 [Cluster-devel] [RFC RHEL7 GFS2 PATCH 0/3] Trimming the ordered write inode list Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 1/3] gfs2: ordered list instrumentation Abhi Das
@ 2017-12-12 17:22 ` Abhi Das
  2017-12-13 11:25   ` Steven Whitehouse
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch Abhi Das
  2 siblings, 1 reply; 7+ messages in thread
From: Abhi Das @ 2017-12-12 17:22 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Add a new function gfs2_ordered_shrink() that is called when
syncfs is run. This function runs through the ordered list of
inodes and removes the ones that don't have any pages in need
of writing.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/log.c   | 16 ++++++++++++++++
 fs/gfs2/log.h   |  1 +
 fs/gfs2/super.c |  4 +++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0257704..6d618a1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -537,6 +537,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 	spin_unlock(&sdp->sd_ordered_lock);
 }
 
+void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
+{
+	struct gfs2_inode *ip, *tmp;
+
+	spin_lock(&sdp->sd_ordered_lock);
+	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
+		if (ip->i_inode.i_mapping->nrpages != 0)
+			continue;
+		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
+			list_del(&ip->i_ordered);
+			ord_stats_adjust(sdp, -1, whence);
+		}
+	}
+	spin_unlock(&sdp->sd_ordered_lock);
+}
+
 void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index bf7729c..80c8861 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -90,6 +90,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 	}
 }
 
+extern void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence);
 extern void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 			    unsigned int ssize);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ee15a50..773e98d 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -948,8 +948,10 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
 	gfs2_quota_sync(sb, -1);
-	if (wait && sdp)
+	if (wait && sdp) {
 		gfs2_log_flush(sdp, NULL);
+		gfs2_ordered_shrink(sdp, ORD_WHENCE_SYNCFS);
+	}
 	return sdp->sd_log_error;
 }
 
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch
  2017-12-12 17:22 [Cluster-devel] [RFC RHEL7 GFS2 PATCH 0/3] Trimming the ordered write inode list Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 1/3] gfs2: ordered list instrumentation Abhi Das
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list Abhi Das
@ 2017-12-12 17:22 ` Abhi Das
  2017-12-13 11:28   ` Steven Whitehouse
  2017-12-13 11:39   ` Steven Whitehouse
  2 siblings, 2 replies; 7+ messages in thread
From: Abhi Das @ 2017-12-12 17:22 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Trim the list in gfs2_ordered_write() as we run through it
to write out inodes.
Also attempt to remove an inode from the list after it is
fsync'ed.
Finally, call gfs2_ordered_write() in case we were not able
to shrink the list in gfs2_ordered_shrink() in the hopes that
it will eventually cause the list to shrink.
---
 fs/gfs2/file.c   |  3 +++
 fs/gfs2/incore.h |  1 +
 fs/gfs2/log.c    | 13 +++++++++++--
 fs/gfs2/log.h    |  4 +++-
 fs/gfs2/quota.c  |  4 ++--
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 757ec66..75f9ac0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 	if (mapping->nrpages)
 		ret = filemap_fdatawait_range(mapping, start, end);
 
+	if (!ret && !ret1)
+		gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
+
 	return ret ? ret : ret1;
 }
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6fcad2a..93da360 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -661,6 +661,7 @@ struct ord_stats {
 	unsigned long os_rm_trunc;
 	unsigned long os_rm_evict;
 	unsigned long os_rm_wait;
+	unsigned long os_rm_fsync;
 	unsigned long os_rm_syncfs;
 	unsigned long os_rm_write;
 	unsigned long os_rm_setflags;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6d618a1..4cfef47 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
 	while (!list_empty(&sdp->sd_log_le_ordered)) {
 		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
-		list_move(&ip->i_ordered, &written);
-		if (ip->i_inode.i_mapping->nrpages == 0)
+		if (ip->i_inode.i_mapping->nrpages == 0) {
+			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
+			list_del(&ip->i_ordered);
+			ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
 			continue;
+		}
+		list_move(&ip->i_ordered, &written);
 		spin_unlock(&sdp->sd_ordered_lock);
 		filemap_fdatawrite(ip->i_inode.i_mapping);
 		spin_lock(&sdp->sd_ordered_lock);
@@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
 {
 	struct gfs2_inode *ip, *tmp;
+	bool removed;
 
 	spin_lock(&sdp->sd_ordered_lock);
 	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
 		if (ip->i_inode.i_mapping->nrpages != 0)
 			continue;
 		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
+			removed = true;
 			list_del(&ip->i_ordered);
 			ord_stats_adjust(sdp, -1, whence);
 		}
 	}
 	spin_unlock(&sdp->sd_ordered_lock);
+
+	if (!removed)
+		gfs2_ordered_write(sdp);
 }
 
 void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 80c8861..0bc3620 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -55,7 +55,8 @@ enum {
 	ORD_WHENCE_SYNCFS       = 3,
 	ORD_WHENCE_ORD_WRITE    = 4,
 	ORD_WHENCE_SETFLAGS     = 5,
-	ORD_WHENCE_ADD          = 6,
+	ORD_WHENCE_FSYNC        = 6,
+	ORD_WHENCE_ADD          = 7,
 };
 
 static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
@@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
 	case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
 	case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
 	case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
+	case ORD_WHENCE_FSYNC:      os->os_rm_fsync += -(count); break;
 
 	case ORD_WHENCE_ADD:        os->os_add += count; break;
 	default: break;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 66c5126..63e3afa 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
 
 		printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
 		       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
-		       " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
+		       " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
 		       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
 		       sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
 		       sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
-		       sdp->sd_ord_stats.os_rm_setflags);
+		       sdp->sd_ord_stats.os_rm_setflags, sdp->sd_ord_stats.os_rm_fsync);
 	}
 
 	return 0;
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list Abhi Das
@ 2017-12-13 11:25   ` Steven Whitehouse
  0 siblings, 0 replies; 7+ messages in thread
From: Steven Whitehouse @ 2017-12-13 11:25 UTC (permalink / raw)
  To: cluster-devel.redhat.com



On 12/12/17 17:22, Abhi Das wrote:
> Add a new function gfs2_ordered_shrink() that is called when
> syncfs is run. This function runs through the ordered list of
> inodes and removes the ones that don't have any pages in need
> of writing.
>
> Signed-off-by: Abhi Das <adas@redhat.com>
> ---
>   fs/gfs2/log.c   | 16 ++++++++++++++++
>   fs/gfs2/log.h   |  1 +
>   fs/gfs2/super.c |  4 +++-
>   3 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 0257704..6d618a1 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -537,6 +537,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
>   	spin_unlock(&sdp->sd_ordered_lock);
>   }
>   
> +void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
> +{
> +	struct gfs2_inode *ip, *tmp;
> +
> +	spin_lock(&sdp->sd_ordered_lock);
> +	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
> +		if (ip->i_inode.i_mapping->nrpages != 0)
> +			continue;
> +		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
> +			list_del(&ip->i_ordered);
> +			ord_stats_adjust(sdp, -1, whence);
> +		}
> +	}
> +	spin_unlock(&sdp->sd_ordered_lock);
> +}
> +
So it is removing inodes with no pages at all.... we should also be able 
to remove inodes which have pages where all the pages are clean too. 
That is more tricky to test for though as there will be a race between 
that test and (re)dirtying the pages. This is a good start however.

Is it possible to add this in to one of the existing loops over the 
ordered inode list? If so then we can get the benefit without needing a 
separate pass over the list... maybe merged into the loop in 
gfs2_ordered_write() for example, so we simply remove items from the 
list at that point when nrpages == 0 ?

Steve.

>   void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
>   {
>   	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index bf7729c..80c8861 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -90,6 +90,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
>   	}
>   }
>   
> +extern void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence);
>   extern void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence);
>   extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
>   			    unsigned int ssize);
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index ee15a50..773e98d 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -948,8 +948,10 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
>   	struct gfs2_sbd *sdp = sb->s_fs_info;
>   
>   	gfs2_quota_sync(sb, -1);
> -	if (wait && sdp)
> +	if (wait && sdp) {
>   		gfs2_log_flush(sdp, NULL);
> +		gfs2_ordered_shrink(sdp, ORD_WHENCE_SYNCFS);
> +	}
>   	return sdp->sd_log_error;
>   }
>   



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch Abhi Das
@ 2017-12-13 11:28   ` Steven Whitehouse
  2017-12-13 11:39   ` Steven Whitehouse
  1 sibling, 0 replies; 7+ messages in thread
From: Steven Whitehouse @ 2017-12-13 11:28 UTC (permalink / raw)
  To: cluster-devel.redhat.com



On 12/12/17 17:22, Abhi Das wrote:
> Trim the list in gfs2_ordered_write() as we run through it
> to write out inodes.
> Also attempt to remove an inode from the list after it is
> fsync'ed.
> Finally, call gfs2_ordered_write() in case we were not able
> to shrink the list in gfs2_ordered_shrink() in the hopes that
> it will eventually cause the list to shrink.
> ---
Ok. I see that question is answered in this patch... yes, that is a good 
plan to remove items from the list at this point. Again since we are 
iterating the list in ordered wait, we could remove items from the list 
then too, to avoid the additional loop,

Steve.

>   fs/gfs2/file.c   |  3 +++
>   fs/gfs2/incore.h |  1 +
>   fs/gfs2/log.c    | 13 +++++++++++--
>   fs/gfs2/log.h    |  4 +++-
>   fs/gfs2/quota.c  |  4 ++--
>   5 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 757ec66..75f9ac0 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
>   	if (mapping->nrpages)
>   		ret = filemap_fdatawait_range(mapping, start, end);
>   
> +	if (!ret && !ret1)
> +		gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
> +
>   	return ret ? ret : ret1;
>   }
>   
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 6fcad2a..93da360 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -661,6 +661,7 @@ struct ord_stats {
>   	unsigned long os_rm_trunc;
>   	unsigned long os_rm_evict;
>   	unsigned long os_rm_wait;
> +	unsigned long os_rm_fsync;
>   	unsigned long os_rm_syncfs;
>   	unsigned long os_rm_write;
>   	unsigned long os_rm_setflags;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 6d618a1..4cfef47 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
>   	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
>   	while (!list_empty(&sdp->sd_log_le_ordered)) {
>   		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
> -		list_move(&ip->i_ordered, &written);
> -		if (ip->i_inode.i_mapping->nrpages == 0)
> +		if (ip->i_inode.i_mapping->nrpages == 0) {
> +			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
> +			list_del(&ip->i_ordered);
> +			ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
>   			continue;
> +		}
> +		list_move(&ip->i_ordered, &written);
>   		spin_unlock(&sdp->sd_ordered_lock);
>   		filemap_fdatawrite(ip->i_inode.i_mapping);
>   		spin_lock(&sdp->sd_ordered_lock);
> @@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
>   void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
>   {
>   	struct gfs2_inode *ip, *tmp;
> +	bool removed;
>   
>   	spin_lock(&sdp->sd_ordered_lock);
>   	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
>   		if (ip->i_inode.i_mapping->nrpages != 0)
>   			continue;
>   		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
> +			removed = true;
>   			list_del(&ip->i_ordered);
>   			ord_stats_adjust(sdp, -1, whence);
>   		}
>   	}
>   	spin_unlock(&sdp->sd_ordered_lock);
> +
> +	if (!removed)
> +		gfs2_ordered_write(sdp);
>   }
>   
>   void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 80c8861..0bc3620 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -55,7 +55,8 @@ enum {
>   	ORD_WHENCE_SYNCFS       = 3,
>   	ORD_WHENCE_ORD_WRITE    = 4,
>   	ORD_WHENCE_SETFLAGS     = 5,
> -	ORD_WHENCE_ADD          = 6,
> +	ORD_WHENCE_FSYNC        = 6,
> +	ORD_WHENCE_ADD          = 7,
>   };
>   
>   static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
> @@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
>   	case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
>   	case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
>   	case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
> +	case ORD_WHENCE_FSYNC:      os->os_rm_fsync += -(count); break;
>   
>   	case ORD_WHENCE_ADD:        os->os_add += count; break;
>   	default: break;
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 66c5126..63e3afa 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
>   
>   		printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
>   		       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
> -		       " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
> +		       " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
>   		       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
>   		       sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
>   		       sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
> -		       sdp->sd_ord_stats.os_rm_setflags);
> +		       sdp->sd_ord_stats.os_rm_setflags, sdp->sd_ord_stats.os_rm_fsync);
>   	}
>   
>   	return 0;



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch
  2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch Abhi Das
  2017-12-13 11:28   ` Steven Whitehouse
@ 2017-12-13 11:39   ` Steven Whitehouse
  1 sibling, 0 replies; 7+ messages in thread
From: Steven Whitehouse @ 2017-12-13 11:39 UTC (permalink / raw)
  To: cluster-devel.redhat.com



On 12/12/17 17:22, Abhi Das wrote:
> Trim the list in gfs2_ordered_write() as we run through it
> to write out inodes.
> Also attempt to remove an inode from the list after it is
> fsync'ed.
> Finally, call gfs2_ordered_write() in case we were not able
> to shrink the list in gfs2_ordered_shrink() in the hopes that
> it will eventually cause the list to shrink.
> ---
>   fs/gfs2/file.c   |  3 +++
>   fs/gfs2/incore.h |  1 +
>   fs/gfs2/log.c    | 13 +++++++++++--
>   fs/gfs2/log.h    |  4 +++-
>   fs/gfs2/quota.c  |  4 ++--
>   5 files changed, 20 insertions(+), 5 deletions(-)
A further thought... eventually we'll probably need to build the bios 
ourselves in order that we can hook the I/O completion functions. That 
would be the best way to get notification that the I/O has completed. So 
something like a copy of the existing filemap/writepage functions but 
allowing us custom completions.

It would also be worth looking at the existing pagecache/mm code too, so 
see how the inode dirty flags get cleared after an fsync for example. 
That might be a useful reference for how to check that the inode is 
clean in a race free manner,

Steve.

>
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 757ec66..75f9ac0 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
>   	if (mapping->nrpages)
>   		ret = filemap_fdatawait_range(mapping, start, end);
>   
> +	if (!ret && !ret1)
> +		gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
> +
>   	return ret ? ret : ret1;
>   }
>   
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 6fcad2a..93da360 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -661,6 +661,7 @@ struct ord_stats {
>   	unsigned long os_rm_trunc;
>   	unsigned long os_rm_evict;
>   	unsigned long os_rm_wait;
> +	unsigned long os_rm_fsync;
>   	unsigned long os_rm_syncfs;
>   	unsigned long os_rm_write;
>   	unsigned long os_rm_setflags;
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index 6d618a1..4cfef47 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
>   	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
>   	while (!list_empty(&sdp->sd_log_le_ordered)) {
>   		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
> -		list_move(&ip->i_ordered, &written);
> -		if (ip->i_inode.i_mapping->nrpages == 0)
> +		if (ip->i_inode.i_mapping->nrpages == 0) {
> +			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
> +			list_del(&ip->i_ordered);
> +			ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
>   			continue;
> +		}
> +		list_move(&ip->i_ordered, &written);
>   		spin_unlock(&sdp->sd_ordered_lock);
>   		filemap_fdatawrite(ip->i_inode.i_mapping);
>   		spin_lock(&sdp->sd_ordered_lock);
> @@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
>   void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
>   {
>   	struct gfs2_inode *ip, *tmp;
> +	bool removed;
>   
>   	spin_lock(&sdp->sd_ordered_lock);
>   	list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
>   		if (ip->i_inode.i_mapping->nrpages != 0)
>   			continue;
>   		if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
> +			removed = true;
>   			list_del(&ip->i_ordered);
>   			ord_stats_adjust(sdp, -1, whence);
>   		}
>   	}
>   	spin_unlock(&sdp->sd_ordered_lock);
> +
> +	if (!removed)
> +		gfs2_ordered_write(sdp);
>   }
>   
>   void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index 80c8861..0bc3620 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -55,7 +55,8 @@ enum {
>   	ORD_WHENCE_SYNCFS       = 3,
>   	ORD_WHENCE_ORD_WRITE    = 4,
>   	ORD_WHENCE_SETFLAGS     = 5,
> -	ORD_WHENCE_ADD          = 6,
> +	ORD_WHENCE_FSYNC        = 6,
> +	ORD_WHENCE_ADD          = 7,
>   };
>   
>   static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
> @@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
>   	case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
>   	case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
>   	case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
> +	case ORD_WHENCE_FSYNC:      os->os_rm_fsync += -(count); break;
>   
>   	case ORD_WHENCE_ADD:        os->os_add += count; break;
>   	default: break;
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 66c5126..63e3afa 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
>   
>   		printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
>   		       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
> -		       " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
> +		       " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
>   		       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
>   		       sdp->sd_ord_stats.os_rm_evict, sdp->sd_ord_stats.os_rm_wait,
>   		       sdp->sd_ord_stats.os_rm_syncfs, sdp->sd_ord_stats.os_rm_write,
> -		       sdp->sd_ord_stats.os_rm_setflags);
> +		       sdp->sd_ord_stats.os_rm_setflags, sdp->sd_ord_stats.os_rm_fsync);
>   	}
>   
>   	return 0;



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-12-13 11:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-12 17:22 [Cluster-devel] [RFC RHEL7 GFS2 PATCH 0/3] Trimming the ordered write inode list Abhi Das
2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 1/3] gfs2: ordered list instrumentation Abhi Das
2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 2/3] gfs2: try to free empty mapping inodes from ordered list Abhi Das
2017-12-13 11:25   ` Steven Whitehouse
2017-12-12 17:22 ` [Cluster-devel] [RFC RHEL7 GFS2 PATCH 3/3] gfs2: ordered write list addendum patch Abhi Das
2017-12-13 11:28   ` Steven Whitehouse
2017-12-13 11:39   ` Steven Whitehouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.