RCU Archive on lore.kernel.org
 help / color / Atom feed
From: Joel Fernandes <joel@joelfernandes.org>
To: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	RCU <rcu@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Subject: Re: [PATCH 6/7] rcu/tiny: support reclaim for head-less object
Date: Sun, 29 Mar 2020 20:56:37 -0400
Message-ID: <20200330005637.GA138004@google.com> (raw)
In-Reply-To: <20200323113621.12048-7-urezki@gmail.com>

On Mon, Mar 23, 2020 at 12:36:20PM +0100, Uladzislau Rezki (Sony) wrote:
> Make a kvfree_call_rcu() function to support head-less
> freeing. Same as for tree-RCU, for such purpose we store
> pointers in array. SLAB and vmalloc ptrs. are mixed and
> coexist together.
> 
> Under high memory pressure it can be that maintaining of
> arrays becomes impossible. Objects with an rcu_head are
> released via call_rcu(). When it comes to the head-less
> variant, the kvfree() call is directly inlined, i.e. we
> do the same as for tree-RCU:
>     a) wait until a grace period has elapsed;
>     b) direct inlining of the kvfree() call.
> 
> Thus the current context has to follow might_sleep()
> annotation. Also please note that for tiny-RCU any
> call of synchronize_rcu() is actually a quiescent
> state, therefore (a) does nothing.

Hmm, Ok. So on -tiny, if there's any allocation failure ever, we immediately
revert to call_rcu(). I guess we could also create a regular (non-array)
queue for objects with an rcu_head and queue it on that (since it does not
need allocation) in case of array allocation failure, however that may not be
worth it. So this LGTM. Thanks!

For entire series:
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
(I will submit a follow-up to fix the tagging, please let me submit Vlad's
entire series with some patches on top -- I also did a bit of wordsmithing in
the commit messages of this series).

Loved the might_sleep() idea btw, I suppose if atomic context wants to do
kvfree_rcu(), then we could also have kfree_rcu() defer the kvfree_rcu() to
execute from a workqueue. Thoughts? We can then allow poor insomniacs from
calling this API :)

thanks,

 - Joel


> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  kernel/rcu/tiny.c | 157 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 156 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
> index 508c82faa45c..b1c31a935db9 100644
> --- a/kernel/rcu/tiny.c
> +++ b/kernel/rcu/tiny.c
> @@ -40,6 +40,29 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
>  	.curtail	= &rcu_ctrlblk.rcucblist,
>  };
>  
> +/* Can be common with tree-RCU. */
> +#define KVFREE_DRAIN_JIFFIES (HZ / 50)
> +
> +/* Can be common with tree-RCU. */
> +struct kvfree_rcu_bulk_data {
> +	unsigned long nr_records;
> +	struct kvfree_rcu_bulk_data *next;
> +	void *records[];
> +};
> +
> +/* Can be common with tree-RCU. */
> +#define KVFREE_BULK_MAX_ENTR \
> +	((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
> +
> +static struct kvfree_rcu_bulk_data *kvhead;
> +static struct kvfree_rcu_bulk_data *kvhead_free;
> +static struct kvfree_rcu_bulk_data *kvcache;
> +
> +static DEFINE_STATIC_KEY_FALSE(rcu_init_done);
> +static struct delayed_work monitor_work;
> +static struct rcu_work rcu_work;
> +static bool monitor_todo;
> +
>  void rcu_barrier(void)
>  {
>  	wait_rcu_gp(call_rcu);
> @@ -177,9 +200,137 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
>  }
>  EXPORT_SYMBOL_GPL(call_rcu);
>  
> +static inline bool
> +kvfree_call_rcu_add_ptr_to_bulk(void *ptr)
> +{
> +	struct kvfree_rcu_bulk_data *bnode;
> +
> +	if (!kvhead || kvhead->nr_records == KVFREE_BULK_MAX_ENTR) {
> +		bnode = xchg(&kvcache, NULL);
> +		if (!bnode)
> +			bnode = (struct kvfree_rcu_bulk_data *)
> +				__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
> +
> +		if (unlikely(!bnode))
> +			return false;
> +
> +		/* Initialize the new block. */
> +		bnode->nr_records = 0;
> +		bnode->next = kvhead;
> +
> +		/* Attach it to the bvhead. */
> +		kvhead = bnode;
> +	}
> +
> +	/* Done. */
> +	kvhead->records[kvhead->nr_records++] = ptr;
> +	return true;
> +}
> +
> +static void
> +kvfree_rcu_work(struct work_struct *work)
> +{
> +	struct kvfree_rcu_bulk_data *kvhead_tofree, *next;
> +	unsigned long flags;
> +	int i;
> +
> +	local_irq_save(flags);
> +	kvhead_tofree = kvhead_free;
> +	kvhead_free = NULL;
> +	local_irq_restore(flags);
> +
> +	/* Reclaim process. */
> +	for (; kvhead_tofree; kvhead_tofree = next) {
> +		next = kvhead_tofree->next;
> +
> +		for (i = 0; i < kvhead_tofree->nr_records; i++) {
> +			debug_rcu_head_unqueue((struct rcu_head *)
> +				kvhead_tofree->records[i]);
> +			kvfree(kvhead_tofree->records[i]);
> +		}
> +
> +		if (cmpxchg(&kvcache, NULL, kvhead_tofree))
> +			free_page((unsigned long) kvhead_tofree);
> +	}
> +}
> +
> +static inline bool
> +queue_kvfree_rcu_work(void)
> +{
> +	/* Check if the free channel is available. */
> +	if (kvhead_free)
> +		return false;
> +
> +	kvhead_free = kvhead;
> +	kvhead = NULL;
> +
> +	/*
> +	 * Queue the job for memory reclaim after GP.
> +	 */
> +	queue_rcu_work(system_wq, &rcu_work);
> +	return true;
> +}
> +
> +static void kvfree_rcu_monitor(struct work_struct *work)
> +{
> +	unsigned long flags;
> +	bool queued;
> +
> +	local_irq_save(flags);
> +	queued = queue_kvfree_rcu_work();
> +	if (queued)
> +		/* Success. */
> +		monitor_todo = false;
> +	local_irq_restore(flags);
> +
> +	/*
> +	 * If previous RCU reclaim process is still in progress,
> +	 * schedule the work one more time to try again later.
> +	 */
> +	if (monitor_todo)
> +		schedule_delayed_work(&monitor_work,
> +			KVFREE_DRAIN_JIFFIES);
> +}
> +
>  void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  {
> -	call_rcu(head, func);
> +	unsigned long flags;
> +	bool success;
> +	void *ptr;
> +
> +	if (head) {
> +		ptr = (void *) head - (unsigned long) func;
> +	} else {
> +		might_sleep();
> +		ptr = (void *) func;
> +	}
> +
> +	if (debug_rcu_head_queue(ptr)) {
> +		/* Probable double free, just leak. */
> +		WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
> +			  __func__, head);
> +		return;
> +	}
> +
> +	local_irq_save(flags);
> +	success = kvfree_call_rcu_add_ptr_to_bulk(ptr);
> +	if (static_branch_likely(&rcu_init_done)) {
> +		if (success && !monitor_todo) {
> +			monitor_todo = true;
> +			schedule_delayed_work(&monitor_work,
> +				KVFREE_DRAIN_JIFFIES);
> +		}
> +	}
> +	local_irq_restore(flags);
> +
> +	if (!success) {
> +		if (!head) {
> +			synchronize_rcu();
> +			kvfree(ptr);
> +		} else {
> +			call_rcu(head, func);
> +		}
> +	}
>  }
>  EXPORT_SYMBOL_GPL(kvfree_call_rcu);
>  
> @@ -188,4 +339,8 @@ void __init rcu_init(void)
>  	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
>  	rcu_early_boot_tests();
>  	srcu_init();
> +
> +	INIT_DELAYED_WORK(&monitor_work, kvfree_rcu_monitor);
> +	INIT_RCU_WORK(&rcu_work, kvfree_rcu_work);
> +	static_branch_enable(&rcu_init_done);
>  }
> -- 
> 2.20.1
> 

  reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-23 11:36 [PATCH 0/7] Headless support in the kvfree_rcu() Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 1/7] rcu/tree: simplify KFREE_BULK_MAX_ENTR macro Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 2/7] rcu/tree: maintain separate array for vmalloc ptrs Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 3/7] rcu/tree: introduce expedited_drain flag Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 4/7] rcu/tree: support reclaim for head-less object Uladzislau Rezki (Sony)
2020-03-29 22:56   ` Joel Fernandes
2020-03-30 12:48     ` Uladzislau Rezki
2020-03-23 11:36 ` [PATCH 5/7] rcu/tiny: move kvfree_call_rcu() out of header Uladzislau Rezki (Sony)
2020-03-23 11:36 ` [PATCH 6/7] rcu/tiny: support reclaim for head-less object Uladzislau Rezki (Sony)
2020-03-30  0:56   ` Joel Fernandes [this message]
2020-03-30 14:42     ` Uladzislau Rezki
2020-03-23 11:36 ` [PATCH 7/7] rcu: support headless variant in the kvfree_rcu() Uladzislau Rezki (Sony)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200330005637.GA138004@google.com \
    --to=joel@joelfernandes.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleksiy.avramchenko@sonymobile.com \
    --cc=paulmck@kernel.org \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=urezki@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

RCU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/rcu/0 rcu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 rcu rcu/ https://lore.kernel.org/rcu \
		rcu@vger.kernel.org
	public-inbox-index rcu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.rcu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git