linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>, linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	"Theodore Y . Ts'o" <tytso@mit.edu>,
	Matthew Wilcox <willy@infradead.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	RCU <rcu@vger.kernel.org>, Uladzislau Rezki <urezki@gmail.com>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Subject: [PATCH 19/24] rcu/tree: Support reclaim for head-less object
Date: Tue, 28 Apr 2020 22:58:58 +0200	[thread overview]
Message-ID: <20200428205903.61704-20-urezki@gmail.com> (raw)
In-Reply-To: <20200428205903.61704-1-urezki@gmail.com>

Update the kvfree_call_rcu() with head-less support, it
means an object without any rcu_head structure can be
reclaimed after GP.

To store pointers there are two chain-arrays maintained
one for SLAB and another one is for vmalloc. Both types
of objects(head-less variant and regular one) are placed
there based on the type.

It can be that maintaining of arrays becomes impossible
due to high memory pressure. For such reason there is an
emergency path. In that case objects with rcu_head inside
are just queued building one way list. Later on that list
is drained.

As for head-less variant. Such objects do not have any
rcu_head helper inside. Thus it is dynamically attached.
As a result an object consists of back-pointer and regular
rcu_head. It implies that emergency path can detect such
object type, therefore they are tagged. So a back-pointer
could be freed as well as dynamically attached wrapper.

Even though such approach requires dynamic memory it needs
only sizeof(unsigned long *) + sizeof(struct rcu_head) bytes,
thus SLAB is used to obtain it. Finally if attaching of the
rcu_head and queuing get failed, the current context has
to follow might_sleep() annotation, thus below steps could
be applied:
   a) wait until a grace period has elapsed;
   b) direct inlining of the kvfree() call.

Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Co-developed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 102 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 98 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 51726e4c3b4d..501cac02146d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3072,15 +3072,31 @@ static void kfree_rcu_work(struct work_struct *work)
 	 */
 	for (; head; head = next) {
 		unsigned long offset = (unsigned long)head->func;
-		void *ptr = (void *)head - offset;
+		bool headless;
+		void *ptr;
 
 		next = head->next;
+
+		/* We tag the headless object, if so adjust offset. */
+		headless = (((unsigned long) head - offset) & BIT(0));
+		if (headless)
+			offset -= 1;
+
+		ptr = (void *) head - offset;
+
 		debug_rcu_head_unqueue((struct rcu_head *)ptr);
 		rcu_lock_acquire(&rcu_callback_map);
 		trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
 
-		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
+		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset))) {
+			/*
+			 * If headless free the back-pointer first.
+			 */
+			if (headless)
+				kvfree((void *) *((unsigned long *) ptr));
+
 			kvfree(ptr);
+		}
 
 		rcu_lock_release(&rcu_callback_map);
 		cond_resched_tasks_rcu_qs();
@@ -3221,6 +3237,13 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 			if (IS_ENABLED(CONFIG_PREEMPT_RT))
 				return false;
 
+			/*
+			 * TODO: For one argument of kvfree_rcu() we can
+			 * drop the lock and get the page in sleepable
+			 * context. That would allow to maintain an array
+			 * for the CONFIG_PREEMPT_RT as well. Thus we could
+			 * get rid of dynamic rcu_head attaching code.
+			 */
 			bnode = (struct kvfree_rcu_bulk_data *)
 				__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 		}
@@ -3244,6 +3267,23 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 	return true;
 }
 
+static inline struct rcu_head *
+attach_rcu_head_to_object(void *obj)
+{
+	unsigned long *ptr;
+
+	ptr = kmalloc(sizeof(unsigned long *) +
+			sizeof(struct rcu_head), GFP_NOWAIT |
+				__GFP_RECLAIM |	/* can do direct reclaim. */
+				__GFP_NORETRY |	/* only lightweight one.  */
+				__GFP_NOWARN);	/* no failure reports. */
+	if (!ptr)
+		return NULL;
+
+	ptr[0] = (unsigned long) obj;
+	return ((struct rcu_head *) ++ptr);
+}
+
 /*
  * Queue a request for lazy invocation of appropriate free routine after a
  * grace period. Please note there are three paths are maintained, two are the
@@ -3260,16 +3300,34 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 	struct kfree_rcu_cpu *krcp;
+	bool success;
 	void *ptr;
 
+	if (head) {
+		ptr = (void *) head - (unsigned long) func;
+	} else {
+		/*
+		 * Please note there is a limitation for the head-less
+		 * variant, that is why there is a clear rule for such
+		 * objects:
+		 *
+		 * it can be used from might_sleep() context only. For
+		 * other places please embed an rcu_head to your data.
+		 */
+		might_sleep();
+		ptr = (unsigned long *) func;
+	}
+
 	krcp = krc_this_cpu_lock(&flags);
-	ptr = (void *)head - (unsigned long)func;
 
 	/* Queue the object but don't yet schedule the batch. */
 	if (debug_rcu_head_queue(ptr)) {
 		/* Probable double kfree_rcu(), just leak. */
 		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
 			  __func__, head);
+
+		/* Mark as success and leave. */
+		success = true;
 		goto unlock_return;
 	}
 
@@ -3277,10 +3335,34 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	 * Under high memory pressure GFP_NOWAIT can fail,
 	 * in that case the emergency path is maintained.
 	 */
-	if (unlikely(!kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr))) {
+	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
+	if (!success) {
+		if (head == NULL) {
+			/*
+			 * Headless(one argument kvfree_rcu()) can sleep.
+			 * Drop the lock and tack it back. So it can do
+			 * direct lightweight reclaim.
+			 */
+			krc_this_cpu_unlock(krcp, flags);
+			head = attach_rcu_head_to_object(ptr);
+			krcp = krc_this_cpu_lock(&flags);
+
+			if (head == NULL)
+				goto unlock_return;
+
+			/*
+			 * Tag the headless object. Such objects have a
+			 * back-pointer to the original allocated memory,
+			 * that has to be freed as well as dynamically
+			 * attached wrapper/head.
+			 */
+			func = (rcu_callback_t) (sizeof(unsigned long *) + 1);
+		}
+
 		head->func = func;
 		head->next = krcp->head;
 		krcp->head = head;
+		success = true;
 	}
 
 	WRITE_ONCE(krcp->count, krcp->count + 1);
@@ -3294,6 +3376,18 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 
 unlock_return:
 	krc_this_cpu_unlock(krcp, flags);
+
+	/*
+	 * High memory pressure, so inline kvfree() after
+	 * synchronize_rcu(). We can do it from might_sleep()
+	 * context only, so the current CPU can pass the QS
+	 * state.
+	 */
+	if (!success) {
+		debug_rcu_head_unqueue(ptr);
+		synchronize_rcu();
+		kvfree(ptr);
+	}
 }
 EXPORT_SYMBOL_GPL(kvfree_call_rcu);
 
-- 
2.20.1


  parent reply	other threads:[~2020-04-28 21:00 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-28 20:58 [PATCH 00/24] Introduce kvfree_rcu(1 or 2 arguments) Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 01/24] rcu/tree: Keep kfree_rcu() awake during lock contention Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 02/24] rcu/tree: Skip entry into the page allocator for PREEMPT_RT Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 03/24] rcu/tree: Use consistent style for comments Uladzislau Rezki (Sony)
2020-05-01 19:05   ` Paul E. McKenney
2020-05-01 20:52     ` Joe Perches
2020-05-03 23:44       ` Joel Fernandes
2020-05-04  0:23         ` Paul E. McKenney
2020-05-04  0:34           ` Joe Perches
2020-05-04  0:41           ` Joel Fernandes
2020-05-03 23:52     ` Joel Fernandes
2020-05-04  0:26       ` Paul E. McKenney
2020-05-04  0:39         ` Joel Fernandes
2020-04-28 20:58 ` [PATCH 04/24] rcu/tree: Repeat the monitor if any free channel is busy Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 05/24] rcu/tree: Simplify debug_objects handling Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 06/24] rcu/tree: Simplify KFREE_BULK_MAX_ENTR macro Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 07/24] rcu/tree: move locking/unlocking to separate functions Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 08/24] rcu/tree: Use static initializer for krc.lock Uladzislau Rezki (Sony)
2020-05-01 21:17   ` Paul E. McKenney
2020-05-04 12:10     ` Uladzislau Rezki
2020-04-28 20:58 ` [PATCH 09/24] rcu/tree: cache specified number of objects Uladzislau Rezki (Sony)
2020-05-01 21:27   ` Paul E. McKenney
2020-05-04 12:43     ` Uladzislau Rezki
2020-05-04 15:24       ` Paul E. McKenney
2020-05-04 17:48         ` Uladzislau Rezki
2020-05-04 18:07           ` Paul E. McKenney
2020-05-04 18:08           ` Joel Fernandes
2020-05-04 19:01             ` Paul E. McKenney
2020-05-04 19:37               ` Joel Fernandes
2020-05-04 19:51                 ` Uladzislau Rezki
2020-05-04 20:15                   ` joel
2020-05-04 20:16                   ` Paul E. McKenney
2020-05-05 11:03                     ` Uladzislau Rezki
2020-04-28 20:58 ` [PATCH 10/24] rcu/tree: add rcutree.rcu_min_cached_objs description Uladzislau Rezki (Sony)
2020-05-01 22:25   ` Paul E. McKenney
2020-05-04 12:44     ` Uladzislau Rezki
2020-04-28 20:58 ` [PATCH 11/24] rcu/tree: Maintain separate array for vmalloc ptrs Uladzislau Rezki (Sony)
2020-05-01 21:37   ` Paul E. McKenney
2020-05-03 23:42     ` Joel Fernandes
2020-05-04  0:20       ` Paul E. McKenney
2020-05-04  0:58         ` Joel Fernandes
2020-05-04  2:20           ` Paul E. McKenney
2020-05-04 14:25     ` Uladzislau Rezki
2020-04-28 20:58 ` [PATCH 12/24] rcu/tiny: support vmalloc in tiny-RCU Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 13/24] rcu: Rename rcu_invoke_kfree_callback/rcu_kfree_callback Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 14/24] rcu: Rename __is_kfree_rcu_offset() macro Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 15/24] rcu: Rename kfree_call_rcu() to the kvfree_call_rcu() Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 16/24] mm/list_lru.c: Rename kvfree_rcu() to local variant Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 17/24] rcu: Introduce 2 arg kvfree_rcu() interface Uladzislau Rezki (Sony)
2020-04-28 20:58 ` [PATCH 18/24] mm/list_lru.c: Remove kvfree_rcu_local() function Uladzislau Rezki (Sony)
2020-04-28 20:58 ` Uladzislau Rezki (Sony) [this message]
2020-05-01 22:39   ` [PATCH 19/24] rcu/tree: Support reclaim for head-less object Paul E. McKenney
2020-05-04  0:12     ` Joel Fernandes
2020-05-04  0:28       ` Paul E. McKenney
2020-05-04  0:32         ` Joel Fernandes
2020-05-04 14:21           ` Uladzislau Rezki
2020-05-04 15:31             ` Paul E. McKenney
2020-05-04 16:56               ` Uladzislau Rezki
2020-05-04 17:08                 ` Paul E. McKenney
2020-05-04 12:57     ` Uladzislau Rezki
2020-04-28 20:58 ` [PATCH 20/24] rcu/tree: Make kvfree_rcu() tolerate any alignment Uladzislau Rezki (Sony)
2020-05-01 23:00   ` Paul E. McKenney
2020-05-04  0:24     ` Joel Fernandes
2020-05-04  0:29       ` Paul E. McKenney
2020-05-04  0:31         ` Joel Fernandes
2020-05-04 12:56           ` Uladzislau Rezki
2020-04-28 20:59 ` [PATCH 21/24] rcu/tiny: move kvfree_call_rcu() out of header Uladzislau Rezki (Sony)
2020-05-01 23:03   ` Paul E. McKenney
2020-05-04 12:45     ` Uladzislau Rezki
2020-05-06 18:29     ` Uladzislau Rezki
2020-05-06 18:45       ` Paul E. McKenney
2020-05-07 17:34         ` Uladzislau Rezki
2020-04-28 20:59 ` [PATCH 22/24] rcu/tiny: support reclaim for head-less object Uladzislau Rezki (Sony)
2020-05-01 23:06   ` Paul E. McKenney
2020-05-04  0:27     ` Joel Fernandes
2020-05-04 12:45       ` Uladzislau Rezki
2020-04-28 20:59 ` [PATCH 23/24] rcu: Introduce 1 arg kvfree_rcu() interface Uladzislau Rezki (Sony)
2020-04-28 20:59 ` [PATCH 24/24] lib/test_vmalloc.c: Add test cases for kvfree_rcu() Uladzislau Rezki (Sony)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200428205903.61704-20-urezki@gmail.com \
    --to=urezki@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=oleksiy.avramchenko@sonymobile.com \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).