linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"minchan.kim@gmail.com" <minchan.kim@gmail.com>,
	cl@linux-foundation.org,
	"hugh.dickins" <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <nickpiggin@yahoo.com.au>,
	Ingo Molnar <mingo@elte.hu>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: Re: [RFC][PATCH 4/8] mm: RCU free vmas
Date: Mon, 4 Jan 2010 18:43:36 -0800	[thread overview]
Message-ID: <20100105024336.GQ6748@linux.vnet.ibm.com> (raw)
In-Reply-To: <20100104182813.479668508@chello.nl>

On Mon, Jan 04, 2010 at 07:24:33PM +0100, Peter Zijlstra wrote:
> TODO:
>  - should be SRCU, lack of call_srcu()
> 
> In order to allow speculative vma lookups, RCU free the struct
> vm_area_struct.
> 
> We use two means of detecting a vma is still valid:
>  - firstly, we set RB_CLEAR_NODE once we remove a vma from the tree.
>  - secondly, we check the vma sequence number.
> 
> These two things combined will guarantee that 1) the vma is still
> present and two, it still covers the same range from when we looked it
> up.

OK, I think I see what you are up to here.  I could get you a very crude
throw-away call_srcu() fairly quickly.  I don't yet have a good estimate
of how long it will take me to merge SRCU into the treercu infrastructure,
but am getting there.

So, which release are you thinking in terms of?

							Thanx, Paul

> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  include/linux/mm.h       |   12 ++++++++++++
>  include/linux/mm_types.h |    2 ++
>  init/Kconfig             |   34 +++++++++++++++++-----------------
>  kernel/sched.c           |    9 ++++++++-
>  mm/mmap.c                |   33 +++++++++++++++++++++++++++++++--
>  5 files changed, 70 insertions(+), 20 deletions(-)
> 
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h
> +++ linux-2.6/include/linux/mm.h
> @@ -765,6 +765,18 @@ unsigned long unmap_vmas(struct mmu_gath
>  		unsigned long end_addr, unsigned long *nr_accounted,
>  		struct zap_details *);
> 
> +static inline int vma_is_dead(struct vm_area_struct *vma, unsigned int sequence)
> +{
> +	int ret = RB_EMPTY_NODE(&vma->vm_rb);
> +	unsigned seq = vma->vm_sequence.sequence;
> +	/*
> +	 * Matches both the wmb in write_seqlock_begin/end() and
> +	 * the wmb in detach_vmas_to_be_unmapped()/__unlink_vma().
> +	 */
> +	smp_rmb();
> +	return ret || seq != sequence;
> +}
> +
>  /**
>   * mm_walk - callbacks for walk_page_range
>   * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
> Index: linux-2.6/include/linux/mm_types.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm_types.h
> +++ linux-2.6/include/linux/mm_types.h
> @@ -13,6 +13,7 @@
>  #include <linux/cpumask.h>
>  #include <linux/page-debug-flags.h>
>  #include <linux/seqlock.h>
> +#include <linux/rcupdate.h>
>  #include <asm/page.h>
>  #include <asm/mmu.h>
> 
> @@ -188,6 +189,7 @@ struct vm_area_struct {
>  	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
>  #endif
>  	seqcount_t vm_sequence;
> +	struct rcu_head vm_rcu_head;
>  };
> 
>  struct core_thread {
> Index: linux-2.6/mm/mmap.c
> ===================================================================
> --- linux-2.6.orig/mm/mmap.c
> +++ linux-2.6/mm/mmap.c
> @@ -222,6 +222,19 @@ void unlink_file_vma(struct vm_area_stru
>  	}
>  }
> 
> +static void free_vma_rcu(struct rcu_head *head)
> +{
> +	struct vm_area_struct *vma =
> +		container_of(head, struct vm_area_struct, vm_rcu_head);
> +
> +	kmem_cache_free(vm_area_cachep, vma);
> +}
> +
> +static void free_vma(struct vm_area_struct *vma)
> +{
> +	call_rcu(&vma->vm_rcu_head, free_vma_rcu);
> +}
> +
>  /*
>   * Close a vm structure and free it, returning the next.
>   */
> @@ -238,7 +251,7 @@ static struct vm_area_struct *remove_vma
>  			removed_exe_file_vma(vma->vm_mm);
>  	}
>  	mpol_put(vma_policy(vma));
> -	kmem_cache_free(vm_area_cachep, vma);
> +	free_vma(vma);
>  	return next;
>  }
> 
> @@ -488,6 +501,14 @@ __vma_unlink(struct mm_struct *mm, struc
>  {
>  	prev->vm_next = vma->vm_next;
>  	rb_erase(&vma->vm_rb, &mm->mm_rb);
> +	/*
> +	 * Ensure the removal is completely comitted to memory
> +	 * before clearing the node.
> +	 *
> +	 * Matched by vma_is_dead()/handle_speculative_fault().
> +	 */
> +	smp_wmb();
> +	RB_CLEAR_NODE(&vma->vm_rb);
>  	if (mm->mmap_cache == vma)
>  		mm->mmap_cache = prev;
>  }
> @@ -644,7 +665,7 @@ again:			remove_next = 1 + (end > next->
>  		}
>  		mm->map_count--;
>  		mpol_put(vma_policy(next));
> -		kmem_cache_free(vm_area_cachep, next);
> +		free_vma(next);
>  		/*
>  		 * In mprotect's case 6 (see comments on vma_merge),
>  		 * we must remove another next too. It would clutter
> @@ -1858,6 +1879,14 @@ detach_vmas_to_be_unmapped(struct mm_str
>  	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
>  	do {
>  		rb_erase(&vma->vm_rb, &mm->mm_rb);
> +		/*
> +		 * Ensure the removal is completely comitted to memory
> +		 * before clearing the node.
> +		 *
> +		 * Matched by vma_is_dead()/handle_speculative_fault().
> +		 */
> +		smp_wmb();
> +		RB_CLEAR_NODE(&vma->vm_rb);
>  		mm->map_count--;
>  		tail_vma = vma;
>  		vma = vma->vm_next;
> Index: linux-2.6/init/Kconfig
> ===================================================================
> --- linux-2.6.orig/init/Kconfig
> +++ linux-2.6/init/Kconfig
> @@ -314,19 +314,19 @@ menu "RCU Subsystem"
> 
>  choice
>  	prompt "RCU Implementation"
> -	default TREE_RCU
> +	default TREE_PREEMPT_RCU
> 
> -config TREE_RCU
> -	bool "Tree-based hierarchical RCU"
> -	help
> -	  This option selects the RCU implementation that is
> -	  designed for very large SMP system with hundreds or
> -	  thousands of CPUs.  It also scales down nicely to
> -	  smaller systems.
> +#config TREE_RCU
> +#	bool "Tree-based hierarchical RCU"
> +#	help
> +#	  This option selects the RCU implementation that is
> +#	  designed for very large SMP system with hundreds or
> +#	  thousands of CPUs.  It also scales down nicely to
> +#	  smaller systems.
> 
>  config TREE_PREEMPT_RCU
>  	bool "Preemptable tree-based hierarchical RCU"
> -	depends on PREEMPT
> +#	depends on PREEMPT
>  	help
>  	  This option selects the RCU implementation that is
>  	  designed for very large SMP systems with hundreds or
> @@ -334,14 +334,14 @@ config TREE_PREEMPT_RCU
>  	  is also required.  It also scales down nicely to
>  	  smaller systems.
> 
> -config TINY_RCU
> -	bool "UP-only small-memory-footprint RCU"
> -	depends on !SMP
> -	help
> -	  This option selects the RCU implementation that is
> -	  designed for UP systems from which real-time response
> -	  is not required.  This option greatly reduces the
> -	  memory footprint of RCU.
> +#config TINY_RCU
> +#	bool "UP-only small-memory-footprint RCU"
> +#	depends on !SMP
> +#	help
> +#	  This option selects the RCU implementation that is
> +#	  designed for UP systems from which real-time response
> +#	  is not required.  This option greatly reduces the
> +#	  memory footprint of RCU.
> 
>  endchoice
> 
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -9689,7 +9689,14 @@ void __init sched_init(void)
>  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
>  static inline int preempt_count_equals(int preempt_offset)
>  {
> -	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
> +	int nested = (preempt_count() & ~PREEMPT_ACTIVE)
> +		/*
> +		 * remove this for we need preemptible RCU
> +		 * exactly because it needs to sleep..
> +		 *
> +		 + rcu_preempt_depth()
> +		 */
> +		;
> 
>  	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
>  }
> 
> -- 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2010-01-05  2:43 UTC|newest]

Thread overview: 121+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-04 18:24 [RFC][PATCH 0/8] Speculative pagefault -v3 Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 1/8] mm: Remove pte reference from fault path Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 2/8] mm: Speculative pagefault infrastructure Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 3/8] mm: Add vma sequence count Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 4/8] mm: RCU free vmas Peter Zijlstra
2010-01-05  2:43   ` Paul E. McKenney [this message]
2010-01-05  8:28     ` Peter Zijlstra
2010-01-05 16:05       ` Paul E. McKenney
2010-01-04 18:24 ` [RFC][PATCH 5/8] mm: Speculative pte_map_lock() Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 6/8] mm: handle_speculative_fault() Peter Zijlstra
2010-01-05  0:25   ` KAMEZAWA Hiroyuki
2010-01-05  3:13     ` Linus Torvalds
2010-01-05  8:17       ` Peter Zijlstra
2010-01-05  8:57       ` Peter Zijlstra
2010-01-05 15:34         ` Linus Torvalds
2010-01-05 15:40           ` Al Viro
2010-01-05 16:10             ` Linus Torvalds
2010-01-06 15:41               ` Peter Zijlstra
2010-01-05  9:37       ` Peter Zijlstra
2010-01-05 23:35         ` Linus Torvalds
2010-01-05  4:29     ` Minchan Kim
2010-01-05  4:43       ` KAMEZAWA Hiroyuki
2010-01-05  5:10         ` Linus Torvalds
2010-01-05  5:30           ` KAMEZAWA Hiroyuki
2010-01-05  7:39             ` KAMEZAWA Hiroyuki
2010-01-05 15:26               ` Linus Torvalds
2010-01-05 16:14                 ` Linus Torvalds
2010-01-05 17:25                   ` Andi Kleen
2010-01-05 17:47                     ` Christoph Lameter
2010-01-05 18:00                       ` Andi Kleen
2010-01-05 17:55                     ` Linus Torvalds
2010-01-05 18:13                       ` Christoph Lameter
2010-01-05 18:25                         ` Linus Torvalds
2010-01-05 18:46                           ` Christoph Lameter
2010-01-05 18:56                             ` Linus Torvalds
2010-01-05 19:15                               ` Christoph Lameter
2010-01-05 19:28                                 ` Linus Torvalds
2010-01-05 18:55                           ` Paul E. McKenney
2010-01-05 19:08                             ` Linus Torvalds
2010-01-05 19:23                               ` Paul E. McKenney
2010-01-05 20:29                           ` Peter Zijlstra
2010-01-05 20:46                             ` Linus Torvalds
2010-01-05 21:00                               ` Linus Torvalds
2010-01-05 23:29                             ` Paul E. McKenney
2010-01-06  0:22                 ` KAMEZAWA Hiroyuki
2010-01-06  1:37                   ` Linus Torvalds
2010-01-06  2:52                     ` KAMEZAWA Hiroyuki
2010-01-06  3:27                       ` Linus Torvalds
2010-01-06  3:56                         ` KAMEZAWA Hiroyuki
2010-01-06  4:20                           ` Linus Torvalds
2010-01-06  7:06                             ` KAMEZAWA Hiroyuki
2010-01-06  7:49                               ` Minchan Kim
2010-01-06  9:39                               ` Linus Torvalds
2010-01-07  1:00                                 ` KAMEZAWA Hiroyuki
2010-01-08 16:53                             ` Peter Zijlstra
2010-01-08 17:22                               ` Linus Torvalds
2010-01-08 17:43                                 ` Christoph Lameter
2010-01-08 17:52                                   ` Linus Torvalds
2010-01-08 18:33                                     ` Christoph Lameter
2010-01-08 18:46                                   ` Andi Kleen
2010-01-08 18:56                                     ` Christoph Lameter
2010-01-08 19:10                                       ` Andi Kleen
2010-01-08 19:11                                       ` Linus Torvalds
2010-01-08 19:28                                         ` Andi Kleen
2010-01-08 19:39                                           ` Linus Torvalds
2010-01-08 19:42                                             ` Linus Torvalds
2010-01-08 21:36                                   ` Linus Torvalds
2010-01-08 21:46                                     ` Christoph Lameter
2010-01-08 22:43                                       ` Linus Torvalds
2010-01-08 22:43                                       ` Linus Torvalds
2010-01-09 14:47                               ` Ed Tomlinson
2010-01-10  5:27                                 ` Nitin Gupta
2010-01-05 15:14             ` Christoph Lameter
2010-01-05  8:18           ` Peter Zijlstra
2010-01-05  6:00         ` Minchan Kim
2010-01-05  4:48       ` Linus Torvalds
2010-01-05  6:09         ` Minchan Kim
2010-01-05  6:09           ` KAMEZAWA Hiroyuki
2010-01-05  6:24             ` Minchan Kim
2010-01-05  8:35           ` Peter Zijlstra
2010-01-05 13:45   ` Arjan van de Ven
2010-01-05 14:15     ` Andi Kleen
2010-01-05 15:17     ` Christoph Lameter
2010-01-06  3:22       ` Arjan van de Ven
2010-01-07 16:11         ` Christoph Lameter
2010-01-07 16:19           ` Linus Torvalds
2010-01-07 16:31             ` Linus Torvalds
2010-01-07 16:34             ` Paul E. McKenney
2010-01-07 16:36             ` Christoph Lameter
2010-01-08  4:49               ` Arjan van de Ven
2010-01-08  5:00                 ` Linus Torvalds
2010-01-08 15:51                 ` Christoph Lameter
2010-01-09 15:55                   ` Arjan van de Ven
2010-01-07 17:22             ` Peter Zijlstra
2010-01-07 17:36               ` Linus Torvalds
2010-01-07 17:49                 ` Linus Torvalds
2010-01-07 18:00                   ` Peter Zijlstra
2010-01-07 18:15                     ` Linus Torvalds
2010-01-07 21:49                       ` Peter Zijlstra
2010-01-07 18:44                   ` Linus Torvalds
2010-01-07 19:20                     ` Paul E. McKenney
2010-01-07 20:06                       ` Linus Torvalds
2010-01-07 20:25                         ` Paul E. McKenney
2010-01-07 19:24                     ` Christoph Lameter
2010-01-07 20:08                       ` Linus Torvalds
2010-01-07 20:13                         ` Linus Torvalds
2010-01-07 21:44                     ` Peter Zijlstra
2010-01-07 22:33                       ` Linus Torvalds
2010-01-08  0:23                         ` KAMEZAWA Hiroyuki
2010-01-08  0:25                           ` KAMEZAWA Hiroyuki
2010-01-08  0:39                           ` Linus Torvalds
2010-01-08  0:41                             ` Linus Torvalds
2010-01-07 23:51                 ` Rik van Riel
2010-01-04 18:24 ` [RFC][PATCH 7/8] mm,x86: speculative pagefault support Peter Zijlstra
2010-01-04 18:24 ` [RFC][PATCH 8/8] mm: Optimize pte_map_lock() Peter Zijlstra
2010-01-04 21:41 ` [RFC][PATCH 0/8] Speculative pagefault -v3 Rik van Riel
2010-01-04 21:46   ` Peter Zijlstra
2010-01-04 23:20     ` Rik van Riel
2010-01-04 21:59   ` Christoph Lameter
2010-01-05  0:28     ` KAMEZAWA Hiroyuki
2010-01-05  2:26 ` Minchan Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100105024336.GQ6748@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=cl@linux-foundation.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan.kim@gmail.com \
    --cc=mingo@elte.hu \
    --cc=nickpiggin@yahoo.com.au \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).