RCU Archive on lore.kernel.org
 help / color / Atom feed
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Joel Fernandes <joel@joelfernandes.org>
Cc: RCU <rcu@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Subject: [PATCH 4/7] rcu/tree: support reclaim for head-less object
Date: Mon, 23 Mar 2020 12:36:18 +0100
Message-ID: <20200323113621.12048-5-urezki@gmail.com> (raw)
In-Reply-To: <20200323113621.12048-1-urezki@gmail.com>

Update the kvfree_call_rcu() with head-less support, it
means an object without any rcu_head structure can be
reclaimed after GP.

To store pointers there are two chain-arrays maintained
one for SLAB and another one is for vmalloc. Both types
of objects(head-less variant and regular one) are placed
there based on the type.

It can be that maintaining of arrays becomes impossible
due to high memory pressure. For such reason there is an
emergency path. In that case objects with rcu_head inside
are just queued building one way list. Later on that list
is drained.

As for head-less variant. Such objects do not have any
rcu_head helper inside. Thus it is dynamically attached.
As a result an object consists of back-pointer and regular
rcu_head. It implies that emergency path can detect such
object type, therefore they are tagged. So a back-pointer
could be freed as well as dynamically attached wrapper.

Even though such approach requires dynamic memory it needs
only sizeof(unsigned long *) + sizeof(struct rcu_head) bytes,
thus SLAB is used to obtain it. Finally if attaching of the
rcu_head and queuing get failed, the current context has
to follow might_sleep() annotation, thus below steps could
be applied:
   a) wait until a grace period has elapsed;
   b) direct inlining of the kvfree() call.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tree.c | 94 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 86 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 869a72e25d38..5a64c92feafc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2892,19 +2892,34 @@ static void kfree_rcu_work(struct work_struct *work)
 	 * when we could not allocate a bulk array.
 	 *
 	 * Under that condition an object is queued to the
-	 * list instead.
+	 * list instead. Please note that head-less objects
+	 * have dynamically attached rcu_head, so they also
+	 * contain a back-pointer that has to be freed.
 	 */
 	for (; head; head = next) {
 		unsigned long offset = (unsigned long)head->func;
-		void *ptr = (void *)head - offset;
+		bool headless;
+		void *ptr;
 
 		next = head->next;
+
+		/* We tag the headless object, if so adjust offset. */
+		headless = (((unsigned long) head - offset) & BIT(0));
+		if (headless)
+			offset -= 1;
+
+		ptr = (void *) head - offset;
 		debug_rcu_head_unqueue((struct rcu_head *)ptr);
+
 		rcu_lock_acquire(&rcu_callback_map);
 		trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
 
-		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
+		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset))) {
+			if (headless)
+				kvfree((void *) *((unsigned long *) ptr));
+
 			kvfree(ptr);
+		}
 
 		rcu_lock_release(&rcu_callback_map);
 		cond_resched_tasks_rcu_qs();
@@ -3053,6 +3068,25 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr)
 	return true;
 }
 
+static inline struct rcu_head *
+attach_rcu_head_to_object(void *obj)
+{
+	unsigned long *ptr;
+
+	ptr = kmalloc(sizeof(unsigned long *) +
+			sizeof(struct rcu_head), GFP_NOWAIT | __GFP_NOWARN);
+
+	if (!ptr)
+		ptr = kmalloc(sizeof(unsigned long *) +
+				sizeof(struct rcu_head), GFP_ATOMIC | __GFP_NOWARN);
+
+	if (!ptr)
+		return NULL;
+
+	ptr[0] = (unsigned long) obj;
+	return ((struct rcu_head *) ++ptr);
+}
+
 /*
  * Queue a request for lazy invocation of appropriate free routine after a
  * grace period. Please note there are three paths are maintained, two are the
@@ -3071,20 +3105,37 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	unsigned long flags;
 	struct kfree_rcu_cpu *krcp;
 	bool expedited_drain = false;
+	bool success;
 	void *ptr;
 
+	if (head) {
+		ptr = (void *) head - (unsigned long) func;
+	} else {
+		/*
+		 * Please note there is a limitation for the head-less
+		 * variant, that is why there is a clear rule for such
+		 * objects:
+		 *
+		 * use it from might_sleep() context only. For other
+		 * places please embed an rcu_head to your structures.
+		 */
+		might_sleep();
+		ptr = (unsigned long *) func;
+	}
+
 	local_irq_save(flags);	// For safely calling this_cpu_ptr().
 	krcp = this_cpu_ptr(&krc);
 	if (krcp->initialized)
 		spin_lock(&krcp->lock);
 
-	ptr = (void *)head - (unsigned long)func;
-
 	// Queue the object but don't yet schedule the batch.
 	if (debug_rcu_head_queue(ptr)) {
 		// Probable double kfree_rcu(), just leak.
 		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
 			  __func__, head);
+
+		/* Mark as success and leave. */
+		success = true;
 		goto unlock_return;
 	}
 
@@ -3092,7 +3143,22 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	 * Under high memory pressure GFP_NOWAIT can fail,
 	 * in that case the emergency path is maintained.
 	 */
-	if (!kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr)) {
+	success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
+	if (!success) {
+		/* Is headless object? */
+		if (head == NULL) {
+			head = attach_rcu_head_to_object(ptr);
+			if (head == NULL)
+				goto unlock_return;
+
+			/*
+			 * Tag the headless object. Such objects have a back-pointer
+			 * to the original allocated memory, that has to be freed as
+			 * well as dynamically attached wrapper/head.
+			 */
+			func = (rcu_callback_t) (sizeof(unsigned long *) + 1);
+		}
+
 		head->func = func;
 		head->next = krcp->head;
 		krcp->head = head;
@@ -3104,15 +3170,15 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 		 * appropriate free calls.
 		 */
 		expedited_drain = true;
+		success = true;
 	}
 
 	WRITE_ONCE(krcp->count, krcp->count + 1);
 
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
-	    !krcp->monitor_todo) {
+			!krcp->monitor_todo) {
 		krcp->monitor_todo = true;
-
 		schedule_delayed_work(&krcp->monitor_work,
 			expedited_drain ? 0:KFREE_DRAIN_JIFFIES);
 	}
@@ -3121,6 +3187,18 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	if (krcp->initialized)
 		spin_unlock(&krcp->lock);
 	local_irq_restore(flags);
+
+	/*
+	 * High memory pressure, so inline kvfree() after
+	 * synchronize_rcu(). We can do it from might_sleep()
+	 * context only, so the current CPU can pass the QS
+	 * state.
+	 */
+	if (!success) {
+		debug_rcu_head_unqueue(ptr);
+		synchronize_rcu();
+		kvfree(ptr);
+	}
 }
 EXPORT_SYMBOL_GPL(kvfree_call_rcu);
 
-- 
2.20.1


  parent reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-23 11:36 [PATCH 0/7] Headless support in the kvfree_rcu() Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 1/7] rcu/tree: simplify KFREE_BULK_MAX_ENTR macro Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 2/7] rcu/tree: maintain separate array for vmalloc ptrs Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 3/7] rcu/tree: introduce expedited_drain flag Uladzislau Rezki (Sony)
2020-03-23 11:36 ` Uladzislau Rezki (Sony) [this message]
2020-03-29 22:56   ` [PATCH 4/7] rcu/tree: support reclaim for head-less object Joel Fernandes
2020-03-30 12:48     ` Uladzislau Rezki
2020-03-23 11:36 ` [PATCH 5/7] rcu/tiny: move kvfree_call_rcu() out of header Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 6/7] rcu/tiny: support reclaim for head-less object Uladzislau Rezki (Sony)
2020-03-30  0:56   ` Joel Fernandes
2020-03-30 14:42     ` Uladzislau Rezki
2020-03-23 11:36 ` [PATCH 7/7] rcu: support headless variant in the kvfree_rcu() Uladzislau Rezki (Sony)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200323113621.12048-5-urezki@gmail.com \
    --to=urezki@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleksiy.avramchenko@sonymobile.com \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

RCU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/rcu/0 rcu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 rcu rcu/ https://lore.kernel.org/rcu \
		rcu@vger.kernel.org
	public-inbox-index rcu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.rcu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git