Re: [PATCH RFC tip/core/rcu] Add shrinker to shift to fast/inefficient GP mode

* Re: [PATCH RFC tip/core/rcu] Add shrinker to shift to fast/inefficient GP mode
       [not found] <20200507004240.GA9156@paulmck-ThinkPad-P72>
@ 2020-05-07  9:36 ` Hillf Danton
  2020-05-07 15:49   ` Paul E. McKenney
  0 siblings, 1 reply; 4+ messages in thread
From: Hillf Danton @ 2020-05-07  9:36 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, rcu, akpm, Hillf Danton, linux-mm


Hello Paul

On Wed, 6 May 2020 17:42:40 Paul E. McKenney wrote:
> 
> This commit adds a shrinker so as to inform RCU when memory is scarce.

A simpler hook is added in the logic of kswapd for subscribing the info
that memory pressure is high, and then on top of it make rcu a subscriber
by copying your code for the shrinker, wishing it makes a sense to you.

What's not yet included is to make the hook per node to help make every
reviewer convinced that memory is becoming tight. Of course without the
cost of making subscribers node aware.

Hillf

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -49,6 +49,16 @@ static inline void set_max_mapnr(unsigne
 static inline void set_max_mapnr(unsigned long limit) { }
 #endif
 
+/* subscriber of kswapd's memory_pressure_high signal */
+struct mph_subscriber {
+	struct list_head node;
+	void (*info) (void *data);
+	void *data;
+};
+
+int mph_subscribe(struct mph_subscriber *ms);
+void mph_unsubscribe(struct mph_subscriber *ms);
+
 extern atomic_long_t _totalram_pages;
 static inline unsigned long totalram_pages(void)
 {
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3536,6 +3536,40 @@ static bool kswapd_shrink_node(pg_data_t
 }
 
 /*
+ * subscribers of kswapd's signal that memory pressure is high
+ */
+static LIST_HEAD(mph_subs);
+static DEFINE_MUTEX(mph_lock);
+
+int mph_subscribe(struct mph_subscriber *ms)
+{
+	if (!ms->info)
+		return -EAGAIN;
+
+	mutex_lock(&mph_lock);
+	list_add_tail(&ms->node, &mph_subs);
+	mutex_unlock(&mph_lock);
+	return 0;
+}
+
+void mph_unsubscribe(struct mph_subscriber *ms)
+{
+	mutex_lock(&mph_lock);
+	list_del(&ms->node);
+	mutex_unlock(&mph_lock);
+}
+
+static void kswapd_bbc_mph(void)
+{
+	struct mph_subscriber *ms;
+
+	mutex_lock(&mph_lock);
+	list_for_each_entry(ms, &mph_subs, node)
+		ms->info(ms->data);
+	mutex_unlock(&mph_lock);
+}
+
+/*
  * For kswapd, balance_pgdat() will reclaim pages across a node from zones
  * that are eligible for use by the caller until at least one zone is
  * balanced.
@@ -3663,8 +3697,11 @@ restart:
 		 * If we're getting trouble reclaiming, start doing writepage
 		 * even in laptop mode.
 		 */
-		if (sc.priority < DEF_PRIORITY - 2)
+		if (sc.priority < DEF_PRIORITY - 2) {
 			sc.may_writepage = 1;
+			if (sc.priority == DEF_PRIORITY - 3)
+				kswapd_bbc_mph();
+		}
 
 		/* Call soft limit reclaim before calling shrink_node. */
 		sc.nr_scanned = 0;
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -325,6 +325,8 @@ struct rcu_state {
 	int ncpus_snap;				/* # CPUs seen last time. */
 	u8 cbovld;				/* Callback overload now? */
 	u8 cbovldnext;				/* ^        ^  next time? */
+	u8 mph;					/* mm pressure high signal from kswapd */
+	unsigned long mph_end;			/* time stamp in jiffies */
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
 						/*  force_quiescent_state(). */
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -52,6 +52,7 @@
 #include <linux/kprobes.h>
 #include <linux/gfp.h>
 #include <linux/oom.h>
+#include <linux/mm.h>
 #include <linux/smpboot.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
@@ -2314,8 +2315,15 @@ static void force_qs_rnp(int (*f)(struct
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
-	rcu_state.cbovld = rcu_state.cbovldnext;
+	rcu_state.cbovld = smp_load_acquire(&rcu_state.mph) ||
+					rcu_state.cbovldnext;
 	rcu_state.cbovldnext = false;
+
+	if (READ_ONCE(rcu_state.mph) &&
+	    time_after(jiffies, READ_ONCE(rcu_state.mph_end))) {
+		WRITE_ONCE(rcu_state.mph, false);
+		pr_info("%s: Ending OOM-mode grace periods.\n", __func__);
+	}
 	rcu_for_each_leaf_node(rnp) {
 		cond_resched_tasks_rcu_qs();
 		mask = 0;
@@ -2643,6 +2651,20 @@ static void check_cb_ovld(struct rcu_dat
 	raw_spin_unlock_rcu_node(rnp);
 }
 
+static void rcu_mph_info(void *data)
+{
+	struct rcu_state *state = data;
+
+	WRITE_ONCE(state->mph_end, jiffies + HZ / 10);
+	smp_store_release(&state->mph, true);
+	rcu_force_quiescent_state();
+}
+
+static struct mph_subscriber rcu_mph_subscriber = {
+	.info = rcu_mph_info,
+	.data = &rcu_state,
+};
+
 /* Helper function for call_rcu() and friends.  */
 static void
 __call_rcu(struct rcu_head *head, rcu_callback_t func)
@@ -4036,6 +4058,8 @@ void __init rcu_init(void)
 		qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark;
 	else
 		qovld_calc = qovld;
+
+	mph_subscribe(&rcu_mph_subscriber);
 }
 
 #include "tree_stall.h"



^ permalink raw reply	[flat|nested] 4+ messages in thread