[Cluster-devel] [GFS2 PATCH] GFS2: Separate lru lists by glock type, use with glops

* [Cluster-devel] [GFS2 PATCH] GFS2: Separate lru lists by glock type, use with glops
       [not found] <2010554939.31866585.1500384039170.JavaMail.zimbra@redhat.com>
@ 2017-07-21 15:19 ` Bob Peterson
  2017-07-21 15:52   ` Bob Peterson
  2017-07-22 16:45   ` Andreas Gruenbacher
  0 siblings, 2 replies; 3+ messages in thread
From: Bob Peterson @ 2017-07-21 15:19 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On 03 July I posted a patch called:

"GFS2: Convert LRU list to use a hash table to reduce contention"

Steve Whitehouse didn't like it for several reasons, one of them
being that the information was no longer age-ordered. Point taken.

The following is a new patch I wrote as a potential replacement.

The idea here is simply to reduce LRU contention by breaking
the lru_list into different lists, by glock type. All lru lists
are kept in order within the glock type. Thus, all inode glocks
are ordered properly.

The logic to manage the LRU lists is basically the same, except:

1. There are now multiple lists
2. The lists are managed by functions hooked into the glock ops
   (glops).
3. The GLOF_LRU bit is no longer needed, since we can now simply
   check whether the glops has the appropriate lru list function.
4. In function gfs2_clear_rgrpd, when the rgrps are being
   dismantled for unmount, it was calling gfs2_glock_add_to_lru()
   for the rgrp glocks. This is incorrect, since rgrps are never
   added to the lru lists (They didn't even have the GLOF_LRU bit).
   In the pre-patch kernel, it was treated as a no-op, but calling
   the new gl_op is an error. So this line was removed.
5. Another notable exception: In gfs2_evict_inode, if this is a
   "final" glock reference "put" there's no point in putting the
   glock onto the lru list only to remove it again a few
   microseconds later. This is a new optimization.

I asked one of our performance testing guys to see whether this
patch speeds things up. As the attached graph shows, yes it does.
The specsfs test runs anywhere from 9 to 20 percent faster than
the same (pre-release rhel7.4) kernel without the patch.

So given that the LRU logic is preserved, this is well worth doing.
---
Patch description:

This patch breaks the glock lru list into individual lists, one
for each glock type. The glops are then used to access them.
This is to reduce contention.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c38ab6c81898..7bc511588a5f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -64,9 +64,19 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int
 static struct dentry *gfs2_root;
 static struct workqueue_struct *glock_workqueue;
 struct workqueue_struct *gfs2_delete_workqueue;
-static LIST_HEAD(lru_list);
-static atomic_t lru_count = ATOMIC_INIT(0);
-static DEFINE_SPINLOCK(lru_lock);
+
+LIST_HEAD(lru_list_inode);
+LIST_HEAD(lru_list_iopen);
+LIST_HEAD(lru_list_flock);
+LIST_HEAD(lru_list_quota);
+atomic_t lru_count_inode = ATOMIC_INIT(0);
+atomic_t lru_count_iopen = ATOMIC_INIT(0);
+atomic_t lru_count_flock = ATOMIC_INIT(0);
+atomic_t lru_count_quota = ATOMIC_INIT(0);
+DEFINE_SPINLOCK(lru_lock_inode);
+DEFINE_SPINLOCK(lru_lock_iopen);
+DEFINE_SPINLOCK(lru_lock_flock);
+DEFINE_SPINLOCK(lru_lock_quota);
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
@@ -133,32 +143,6 @@ static int demote_ok(const struct gfs2_glock *gl)
 	return 1;
 }
 
-
-void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
-{
-	spin_lock(&lru_lock);
-
-	if (!list_empty(&gl->gl_lru))
-		list_del_init(&gl->gl_lru);
-	else
-		atomic_inc(&lru_count);
-
-	list_add_tail(&gl->gl_lru, &lru_list);
-	set_bit(GLF_LRU, &gl->gl_flags);
-	spin_unlock(&lru_lock);
-}
-
-static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
-{
-	spin_lock(&lru_lock);
-	if (!list_empty(&gl->gl_lru)) {
-		list_del_init(&gl->gl_lru);
-		atomic_dec(&lru_count);
-		clear_bit(GLF_LRU, &gl->gl_flags);
-	}
-	spin_unlock(&lru_lock);
-}
-
 /*
  * Enqueue the glock on the work queue.  Passes one glock reference on to the
  * work queue.
@@ -189,7 +173,8 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
 
 	lockref_mark_dead(&gl->gl_lockref);
 
-	gfs2_glock_remove_from_lru(gl);
+	if (gl->gl_ops->go_remove_from_lru)
+		gl->gl_ops->go_remove_from_lru(gl);
 	spin_unlock(&gl->gl_lockref.lock);
 	rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
 	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
@@ -1018,7 +1003,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
 		return -EIO;
 
 	if (test_bit(GLF_LRU, &gl->gl_flags))
-		gfs2_glock_remove_from_lru(gl);
+		gl->gl_ops->go_remove_from_lru(gl);
 
 	spin_lock(&gl->gl_lockref.lock);
 	add_to_queue(gh);
@@ -1081,9 +1066,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
-	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl) &&
-	    (glops->go_flags & GLOF_LRU))
-		gfs2_glock_add_to_lru(gl);
+	if (glops->go_add_to_lru &&
+	    !test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
+		glops->go_add_to_lru(gl);
 
 	trace_gfs2_glock_queue(gh, 0);
 	if (unlikely(!fast_path)) {
@@ -1352,6 +1337,8 @@ static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
 /**
  * gfs2_dispose_glock_lru - Demote a list of glocks
  * @list: The list to dispose of
+ * @lru_list: The lru_list from which the list came
+ * @lru_count: The lru_list count
  *
  * Disposing of glocks may involve disk accesses, so that here we sort
  * the glocks by number (i.e. disk location of the inodes) so that if
@@ -1363,9 +1350,9 @@ static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
  * private)
  */
 
-static void gfs2_dispose_glock_lru(struct list_head *list)
-__releases(&lru_lock)
-__acquires(&lru_lock)
+static void __dispose_glock_lru(struct list_head *list, spinlock_t *lru_spin,
+				struct list_head *lru_list,
+				atomic_t *lru_count)
 {
 	struct gfs2_glock *gl;
 
@@ -1376,8 +1363,8 @@ __acquires(&lru_lock)
 		list_del_init(&gl->gl_lru);
 		if (!spin_trylock(&gl->gl_lockref.lock)) {
 add_back_to_lru:
-			list_add(&gl->gl_lru, &lru_list);
-			atomic_inc(&lru_count);
+			list_add(&gl->gl_lru, lru_list);
+			atomic_inc(lru_count);
 			continue;
 		}
 		if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
@@ -1391,44 +1378,48 @@ __acquires(&lru_lock)
 		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
 		__gfs2_glock_queue_work(gl, 0);
 		spin_unlock(&gl->gl_lockref.lock);
-		cond_resched_lock(&lru_lock);
+		cond_resched_lock(lru_spin);
 	}
 }
 
 /**
  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
  * @nr: The number of entries to scan
+ * @lru_list: The lru_list from which the list came
+ * @lru_count: The lru_list count
  *
  * This function selects the entries on the LRU which are able to
  * be demoted, and then kicks off the process by calling
  * gfs2_dispose_glock_lru() above.
  */
 
-static long gfs2_scan_glock_lru(int nr)
+unsigned long gfs2_scan_glock_lru(unsigned long *nr, struct list_head *lru_list,
+				  atomic_t *lru_count, spinlock_t *lru_spin)
 {
 	struct gfs2_glock *gl;
 	LIST_HEAD(skipped);
 	LIST_HEAD(dispose);
-	long freed = 0;
+	unsigned long freed = 0;
 
-	spin_lock(&lru_lock);
-	while ((nr-- >= 0) && !list_empty(&lru_list)) {
-		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
+	spin_lock(lru_spin);
+	while (*nr && !list_empty(lru_list)) {
+		gl = list_entry(lru_list->next, struct gfs2_glock, gl_lru);
 
 		/* Test for being demotable */
 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
 			list_move(&gl->gl_lru, &dispose);
-			atomic_dec(&lru_count);
+			atomic_dec(lru_count);
+			(*nr)--;
 			freed++;
 			continue;
 		}
 
 		list_move(&gl->gl_lru, &skipped);
 	}
-	list_splice(&skipped, &lru_list);
+	list_splice(&skipped, lru_list);
 	if (!list_empty(&dispose))
-		gfs2_dispose_glock_lru(&dispose);
-	spin_unlock(&lru_lock);
+		__dispose_glock_lru(&dispose, lru_spin, lru_list, lru_count);
+	spin_unlock(lru_spin);
 
 	return freed;
 }
@@ -1436,15 +1427,30 @@ static long gfs2_scan_glock_lru(int nr)
 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
 					    struct shrink_control *sc)
 {
+	int x;
+	unsigned long lru_count = 0;
+
 	if (!(sc->gfp_mask & __GFP_FS))
 		return SHRINK_STOP;
-	return gfs2_scan_glock_lru(sc->nr_to_scan);
+
+	for (x = 0; sc->nr_to_scan && gfs2_glops_list[x]; x++) {
+		if (gfs2_glops_list[x]->go_scan_lru == NULL)
+			continue;
+		lru_count += gfs2_glops_list[x]->go_scan_lru(&sc->nr_to_scan);
+	}
+	return lru_count;
 }
 
 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
 					     struct shrink_control *sc)
 {
-	return vfs_pressure_ratio(atomic_read(&lru_count));
+	int x;
+	unsigned long lru_count = 0;
+
+	for (x = 0; gfs2_glops_list[x]; x++)
+		if (gfs2_glops_list[x]->go_lru_count)
+			lru_count += gfs2_glops_list[x]->go_lru_count();
+	return vfs_pressure_ratio(lru_count);
 }
 
 static struct shrinker glock_shrinker = {
@@ -1511,7 +1517,8 @@ static void thaw_glock(struct gfs2_glock *gl)
 
 static void clear_glock(struct gfs2_glock *gl)
 {
-	gfs2_glock_remove_from_lru(gl);
+	if (gl->gl_ops->go_remove_from_lru)
+		gl->gl_ops->go_remove_from_lru(gl);
 
 	spin_lock(&gl->gl_lockref.lock);
 	if (gl->gl_state != LM_ST_UNLOCKED)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9ad4a6ac6c84..ce436a83b630 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -135,6 +135,20 @@ struct lm_lockops {
 };
 
 extern struct workqueue_struct *gfs2_delete_workqueue;
+
+extern struct list_head lru_list_inode;
+extern struct list_head lru_list_iopen;
+extern struct list_head lru_list_flock;
+extern struct list_head lru_list_quota;
+extern atomic_t lru_count_inode;
+extern atomic_t lru_count_iopen;
+extern atomic_t lru_count_flock;
+extern atomic_t lru_count_quota;
+extern spinlock_t lru_lock_inode;
+extern spinlock_t lru_lock_iopen;
+extern spinlock_t lru_lock_flock;
+extern spinlock_t lru_lock_quota;
+
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh;
@@ -200,6 +214,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
 extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
+extern unsigned long gfs2_scan_glock_lru(unsigned long *nr,
+					 struct list_head *lru_list,
+					 atomic_t *lru_count,
+					 spinlock_t *lru_spin);
+
 #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
 extern __printf(2, 3)
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
@@ -234,7 +253,6 @@ extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
 extern void gfs2_glock_free(struct gfs2_glock *gl);
 
 extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5e69636d4dd3..ea7aa70c1aaf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -591,6 +591,121 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	}
 }
 
+static void __go_add_to_lru(struct gfs2_glock *gl, spinlock_t *lru_spin,
+			    atomic_t *lru_count, struct list_head *lru_list)
+{
+	spin_lock(lru_spin);
+
+	if (!list_empty(&gl->gl_lru))
+		list_del_init(&gl->gl_lru);
+	else
+		atomic_inc(lru_count);
+
+	list_add_tail(&gl->gl_lru, lru_list);
+	set_bit(GLF_LRU, &gl->gl_flags);
+	spin_unlock(lru_spin);
+}
+
+static void inode_go_add_to_lru(struct gfs2_glock *gl)
+{
+	__go_add_to_lru(gl, &lru_lock_inode, &lru_count_inode,
+			&lru_list_inode);
+}
+
+static void iopen_go_add_to_lru(struct gfs2_glock *gl)
+{
+	__go_add_to_lru(gl, &lru_lock_iopen, &lru_count_iopen,
+			&lru_list_iopen);
+}
+
+static void flock_go_add_to_lru(struct gfs2_glock *gl)
+{
+	__go_add_to_lru(gl, &lru_lock_flock, &lru_count_flock,
+			&lru_list_flock);
+}
+
+static void quota_go_add_to_lru(struct gfs2_glock *gl)
+{
+	__go_add_to_lru(gl, &lru_lock_quota, &lru_count_quota,
+			&lru_list_quota);
+}
+
+static void __go_remove_from_lru(struct gfs2_glock *gl, spinlock_t *lru_spin,
+				 atomic_t *lru_count)
+{
+	spin_lock(lru_spin);
+	if (!list_empty(&gl->gl_lru)) {
+		list_del_init(&gl->gl_lru);
+		atomic_dec(lru_count);
+		clear_bit(GLF_LRU, &gl->gl_flags);
+	}
+	spin_unlock(lru_spin);
+}
+
+static void inode_go_remove_from_lru(struct gfs2_glock *gl)
+{
+	__go_remove_from_lru(gl, &lru_lock_inode, &lru_count_inode);
+}
+
+static void iopen_go_remove_from_lru(struct gfs2_glock *gl)
+{
+	__go_remove_from_lru(gl, &lru_lock_iopen, &lru_count_iopen);
+}
+
+static void flock_go_remove_from_lru(struct gfs2_glock *gl)
+{
+	__go_remove_from_lru(gl, &lru_lock_flock, &lru_count_flock);
+}
+
+static void quota_go_remove_from_lru(struct gfs2_glock *gl)
+{
+	__go_remove_from_lru(gl, &lru_lock_quota, &lru_count_quota);
+}
+
+static unsigned long go_scan_lru_inode(unsigned long *nr)
+{
+	return gfs2_scan_glock_lru(nr, &lru_list_inode, &lru_count_inode,
+				   &lru_lock_inode);
+}
+
+static unsigned long go_scan_lru_iopen(unsigned long *nr)
+{
+	return gfs2_scan_glock_lru(nr, &lru_list_iopen, &lru_count_iopen,
+				   &lru_lock_iopen);
+}
+
+static unsigned long go_scan_lru_flock(unsigned long *nr)
+{
+	return gfs2_scan_glock_lru(nr, &lru_list_flock, &lru_count_flock,
+				   &lru_lock_flock);
+}
+
+static unsigned long go_scan_lru_quota(unsigned long *nr)
+{
+	return gfs2_scan_glock_lru(nr, &lru_list_quota, &lru_count_quota,
+				   &lru_lock_quota);
+}
+
+static unsigned long go_lru_count_inode(void)
+{
+	return atomic_read(&lru_count_inode);
+}
+
+static unsigned long go_lru_count_iopen(void)
+{
+	return atomic_read(&lru_count_iopen);
+}
+
+static unsigned long go_lru_count_flock(void)
+{
+	return atomic_read(&lru_count_flock);
+}
+
+static unsigned long go_lru_count_quota(void)
+{
+	return atomic_read(&lru_count_quota);
+}
+
 const struct gfs2_glock_operations gfs2_meta_glops = {
 	.go_type = LM_TYPE_META,
 };
@@ -601,8 +716,12 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
 	.go_demote_ok = inode_go_demote_ok,
 	.go_lock = inode_go_lock,
 	.go_dump = inode_go_dump,
+	.go_add_to_lru = inode_go_add_to_lru,
+	.go_remove_from_lru = inode_go_remove_from_lru,
+	.go_scan_lru = go_scan_lru_inode,
+	.go_lru_count = go_lru_count_inode,
 	.go_type = LM_TYPE_INODE,
-	.go_flags = GLOF_ASPACE | GLOF_LRU,
+	.go_flags = GLOF_ASPACE,
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -625,12 +744,18 @@ const struct gfs2_glock_operations gfs2_freeze_glops = {
 const struct gfs2_glock_operations gfs2_iopen_glops = {
 	.go_type = LM_TYPE_IOPEN,
 	.go_callback = iopen_go_callback,
-	.go_flags = GLOF_LRU,
+	.go_add_to_lru = iopen_go_add_to_lru,
+	.go_remove_from_lru = iopen_go_remove_from_lru,
+	.go_scan_lru = go_scan_lru_iopen,
+	.go_lru_count = go_lru_count_iopen,
 };
 
 const struct gfs2_glock_operations gfs2_flock_glops = {
 	.go_type = LM_TYPE_FLOCK,
-	.go_flags = GLOF_LRU,
+	.go_add_to_lru = flock_go_add_to_lru,
+	.go_remove_from_lru = flock_go_remove_from_lru,
+	.go_scan_lru = go_scan_lru_flock,
+	.go_lru_count = go_lru_count_flock,
 };
 
 const struct gfs2_glock_operations gfs2_nondisk_glops = {
@@ -639,7 +764,11 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
 
 const struct gfs2_glock_operations gfs2_quota_glops = {
 	.go_type = LM_TYPE_QUOTA,
-	.go_flags = GLOF_LVB | GLOF_LRU,
+	.go_add_to_lru = quota_go_add_to_lru,
+	.go_remove_from_lru = quota_go_remove_from_lru,
+	.go_scan_lru = go_scan_lru_quota,
+	.go_lru_count = go_lru_count_quota,
+	.go_flags = GLOF_LVB,
 };
 
 const struct gfs2_glock_operations gfs2_journal_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 73fce76e67ee..743616cca819 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -228,11 +228,14 @@ struct gfs2_glock_operations {
 	void (*go_unlock) (struct gfs2_holder *gh);
 	void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
 	void (*go_callback)(struct gfs2_glock *gl, bool remote);
+	void (*go_add_to_lru)(struct gfs2_glock *gl);
+	void (*go_remove_from_lru)(struct gfs2_glock *gl);
+	unsigned long (*go_scan_lru)(unsigned long *nr);
+	unsigned long (*go_lru_count)(void);
 	const int go_type;
 	const unsigned long go_flags;
 #define GLOF_ASPACE 1
 #define GLOF_LVB    2
-#define GLOF_LRU    4
 };
 
 enum {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 836e38ba5d0a..29fbeee36fa6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -706,7 +706,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
 		if (gl) {
 			glock_set_object(gl, NULL);
-			gfs2_glock_add_to_lru(gl);
 			gfs2_glock_put(gl);
 		}
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fdedec379b78..bf1be5432f65 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1642,7 +1642,10 @@ static void gfs2_evict_inode(struct inode *inode)
 	gfs2_dir_hash_inval(ip);
 	glock_set_object(ip->i_gl, NULL);
 	wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
-	gfs2_glock_add_to_lru(ip->i_gl);
+	/* If gl_lockref.count is 1, the following glock_put would just need
+	   to pull it back off again. */
+	if (ip->i_gl->gl_lockref.count > 1)
+		ip->i_gl->gl_ops->go_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
 	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: specsfs.results.graph.jpg
Type: image/jpeg
Size: 28278 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20170721/4546f538/attachment.jpg>

^ permalink raw reply related	[flat|nested] 3+ messages in thread