All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18  6:50 ` Greg Thelen
  0 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-08-18  6:50 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, Greg Thelen

Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
unnecessarily disabling preemption when adjusting per-cpu counters:
    preempt_disable()
    __this_cpu_xxx()
    __this_cpu_yyy()
    preempt_enable()

This change does not disable preemption and thus CPU switch is possible
within these routines.  This does not cause a problem because the total
of all cpu counters is summed when reporting stats.  Now both
mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
    this_cpu_xxx()
    this_cpu_yyy()

Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
---
 mm/memcontrol.c |   20 +++++++-------------
 1 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6faa32..048b205 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {
-	preempt_disable();
-
 	if (file)
-		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
+		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
 	else
-		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
+		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
 
 	/* pagein of a big page is an event. So, ignore page size */
 	if (nr_pages > 0)
-		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
+		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
 	else {
-		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
+		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
 		nr_pages = -nr_pages; /* for event */
 	}
 
-	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
-
-	preempt_enable();
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
 }
 
 unsigned long
@@ -2713,10 +2709,8 @@ static int mem_cgroup_move_account(struct page *page,
 
 	if (PageCgroupFileMapped(pc)) {
 		/* Update mapped_file data for mem_cgroup */
-		preempt_disable();
-		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		preempt_enable();
+		this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+		this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
 	}
 	mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
 	if (uncharge)
-- 
1.7.3.1


^ permalink raw reply related	[flat|nested] 56+ messages in thread

* [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18  6:50 ` Greg Thelen
  0 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-08-18  6:50 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, Greg Thelen

Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
unnecessarily disabling preemption when adjusting per-cpu counters:
    preempt_disable()
    __this_cpu_xxx()
    __this_cpu_yyy()
    preempt_enable()

This change does not disable preemption and thus CPU switch is possible
within these routines.  This does not cause a problem because the total
of all cpu counters is summed when reporting stats.  Now both
mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
    this_cpu_xxx()
    this_cpu_yyy()

Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
---
 mm/memcontrol.c |   20 +++++++-------------
 1 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6faa32..048b205 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {
-	preempt_disable();
-
 	if (file)
-		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
+		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
 	else
-		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
+		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
 
 	/* pagein of a big page is an event. So, ignore page size */
 	if (nr_pages > 0)
-		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
+		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
 	else {
-		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
+		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
 		nr_pages = -nr_pages; /* for event */
 	}
 
-	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
-
-	preempt_enable();
+	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
 }
 
 unsigned long
@@ -2713,10 +2709,8 @@ static int mem_cgroup_move_account(struct page *page,
 
 	if (PageCgroupFileMapped(pc)) {
 		/* Update mapped_file data for mem_cgroup */
-		preempt_disable();
-		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		preempt_enable();
+		this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+		this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
 	}
 	mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
 	if (uncharge)
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18  6:50 ` Greg Thelen
@ 2011-08-18  6:52   ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-08-18  6:52 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, Balbir Singh, Daisuke Nishimura

On Wed, 17 Aug 2011 23:50:53 -0700
Greg Thelen <gthelen@google.com> wrote:

> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Signed-off-by: Greg Thelen <gthelen@google.com>

Thank you.
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18  6:52   ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-08-18  6:52 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, Balbir Singh, Daisuke Nishimura

On Wed, 17 Aug 2011 23:50:53 -0700
Greg Thelen <gthelen@google.com> wrote:

> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Signed-off-by: Greg Thelen <gthelen@google.com>

Thank you.
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18  6:50 ` Greg Thelen
@ 2011-08-18  9:38   ` Johannes Weiner
  -1 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-08-18  9:38 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()

Note that on non-x86, these operations themselves actually disable and
reenable preemption each time, so you trade a pair of add and sub on
x86

-	preempt_disable()
	__this_cpu_xxx()
	__this_cpu_yyy()
-	preempt_enable()

with

	preempt_disable()
	__this_cpu_xxx()
+	preempt_enable()
+	preempt_disable()
	__this_cpu_yyy()
	preempt_enable()

everywhere else.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18  9:38   ` Johannes Weiner
  0 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-08-18  9:38 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()

Note that on non-x86, these operations themselves actually disable and
reenable preemption each time, so you trade a pair of add and sub on
x86

-	preempt_disable()
	__this_cpu_xxx()
	__this_cpu_yyy()
-	preempt_enable()

with

	preempt_disable()
	__this_cpu_xxx()
+	preempt_enable()
+	preempt_disable()
	__this_cpu_yyy()
	preempt_enable()

everywhere else.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18  9:38   ` Johannes Weiner
  (?)
@ 2011-08-18 14:26   ` Valdis.Kletnieks
  2011-08-18 14:41       ` Johannes Weiner
  -1 siblings, 1 reply; 56+ messages in thread
From: Valdis.Kletnieks @ 2011-08-18 14:26 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura

[-- Attachment #1: Type: text/plain, Size: 652 bytes --]

On Thu, 18 Aug 2011 11:38:00 +0200, Johannes Weiner said:

> Note that on non-x86, these operations themselves actually disable and
> reenable preemption each time, so you trade a pair of add and sub on
> x86
> 
> -	preempt_disable()
> 	__this_cpu_xxx()
> 	__this_cpu_yyy()
> -	preempt_enable()
> 
> with
> 
> 	preempt_disable()
> 	__this_cpu_xxx()
> +	preempt_enable()
> +	preempt_disable()
> 	__this_cpu_yyy()
> 	preempt_enable()
> 
> everywhere else.

That would be an unexpected race condition on non-x86, if you expected _xxx and
_yyy to be done together without a preempt between them. Would take mere
mortals forever to figure that one out. :)


[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18 14:26   ` Valdis.Kletnieks
@ 2011-08-18 14:41       ` Johannes Weiner
  0 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-08-18 14:41 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura

On Thu, Aug 18, 2011 at 10:26:58AM -0400, Valdis.Kletnieks@vt.edu wrote:
> On Thu, 18 Aug 2011 11:38:00 +0200, Johannes Weiner said:
> 
> > Note that on non-x86, these operations themselves actually disable and
> > reenable preemption each time, so you trade a pair of add and sub on
> > x86
> > 
> > -	preempt_disable()
> > 	__this_cpu_xxx()
> > 	__this_cpu_yyy()
> > -	preempt_enable()
> > 
> > with
> > 
> > 	preempt_disable()
> > 	__this_cpu_xxx()
> > +	preempt_enable()
> > +	preempt_disable()
> > 	__this_cpu_yyy()
> > 	preempt_enable()
> > 
> > everywhere else.
> 
> That would be an unexpected race condition on non-x86, if you expected _xxx and
> _yyy to be done together without a preempt between them. Would take mere
> mortals forever to figure that one out. :)

That should be fine, we don't require the two counters to be perfectly
coherent with respect to each other, which is the justification for
this optimization in the first place.

But on non-x86, the operation to increase a single per-cpu counter
(read-modify-write) itself is made atomic by disabling preemption.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18 14:41       ` Johannes Weiner
  0 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-08-18 14:41 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura

On Thu, Aug 18, 2011 at 10:26:58AM -0400, Valdis.Kletnieks@vt.edu wrote:
> On Thu, 18 Aug 2011 11:38:00 +0200, Johannes Weiner said:
> 
> > Note that on non-x86, these operations themselves actually disable and
> > reenable preemption each time, so you trade a pair of add and sub on
> > x86
> > 
> > -	preempt_disable()
> > 	__this_cpu_xxx()
> > 	__this_cpu_yyy()
> > -	preempt_enable()
> > 
> > with
> > 
> > 	preempt_disable()
> > 	__this_cpu_xxx()
> > +	preempt_enable()
> > +	preempt_disable()
> > 	__this_cpu_yyy()
> > 	preempt_enable()
> > 
> > everywhere else.
> 
> That would be an unexpected race condition on non-x86, if you expected _xxx and
> _yyy to be done together without a preempt between them. Would take mere
> mortals forever to figure that one out. :)

That should be fine, we don't require the two counters to be perfectly
coherent with respect to each other, which is the justification for
this optimization in the first place.

But on non-x86, the operation to increase a single per-cpu counter
(read-modify-write) itself is made atomic by disabling preemption.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18 14:41       ` Johannes Weiner
  (?)
@ 2011-08-18 18:27       ` Valdis.Kletnieks
  -1 siblings, 0 replies; 56+ messages in thread
From: Valdis.Kletnieks @ 2011-08-18 18:27 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura

[-- Attachment #1: Type: text/plain, Size: 1230 bytes --]

On Thu, 18 Aug 2011 16:41:53 +0200, Johannes Weiner said:
> On Thu, Aug 18, 2011 at 10:26:58AM -0400, Valdis.Kletnieks@vt.edu wrote:
> > On Thu, 18 Aug 2011 11:38:00 +0200, Johannes Weiner said:
> > 
> > > Note that on non-x86, these operations themselves actually disable and
> > > reenable preemption each time, so you trade a pair of add and sub on
> > > x86
> > > 
> > > -	preempt_disable()
> > > 	__this_cpu_xxx()
> > > 	__this_cpu_yyy()
> > > -	preempt_enable()
> > > 
> > > with
> > > 
> > > 	preempt_disable()
> > > 	__this_cpu_xxx()
> > > +	preempt_enable()
> > > +	preempt_disable()
> > > 	__this_cpu_yyy()
> > > 	preempt_enable()
> > > 
> > > everywhere else.
> > 
> > That would be an unexpected race condition on non-x86, if you expected _xxx and
> > _yyy to be done together without a preempt between them. Would take mere
> > mortals forever to figure that one out. :)
> 
> That should be fine, we don't require the two counters to be perfectly
> coherent with respect to each other, which is the justification for
> this optimization in the first place.

I meant the general case - when reviewing code, I wouldn't expect 2 lines of code
wrapped in preempt disable/enable to have a preempt window in the middle. ;)

[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18  6:50 ` Greg Thelen
@ 2011-08-18 21:40   ` Andrew Morton
  -1 siblings, 0 replies; 56+ messages in thread
From: Andrew Morton @ 2011-08-18 21:40 UTC (permalink / raw)
  To: Greg Thelen
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, linux-arch

(cc linux-arch)

On Wed, 17 Aug 2011 23:50:53 -0700
Greg Thelen <gthelen@google.com> wrote:

> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> ...
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>  					 bool file, int nr_pages)
>  {
> -	preempt_disable();
> -
>  	if (file)
> -		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
> +		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>  	else
> -		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
> +		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>  
>  	/* pagein of a big page is an event. So, ignore page size */
>  	if (nr_pages > 0)
> -		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
> +		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>  	else {
> -		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
> +		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>  		nr_pages = -nr_pages; /* for event */
>  	}
>  
> -	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
> -
> -	preempt_enable();
> +	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>  }

On non-x86 architectures this_cpu_add() internally does
preempt_disable() and preempt_enable().  So the patch is a small
optimisation for x86 and a larger deoptimisation for non-x86.

I think I'll apply it, as the call frequency is low (correct?) and the
problem will correct itself as other architectures implement their
atomic this_cpu_foo() operations.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-18 21:40   ` Andrew Morton
  0 siblings, 0 replies; 56+ messages in thread
From: Andrew Morton @ 2011-08-18 21:40 UTC (permalink / raw)
  To: Greg Thelen
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, linux-arch

(cc linux-arch)

On Wed, 17 Aug 2011 23:50:53 -0700
Greg Thelen <gthelen@google.com> wrote:

> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> ...
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>  					 bool file, int nr_pages)
>  {
> -	preempt_disable();
> -
>  	if (file)
> -		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
> +		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>  	else
> -		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
> +		this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>  
>  	/* pagein of a big page is an event. So, ignore page size */
>  	if (nr_pages > 0)
> -		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
> +		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>  	else {
> -		__this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
> +		this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>  		nr_pages = -nr_pages; /* for event */
>  	}
>  
> -	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
> -
> -	preempt_enable();
> +	this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>  }

On non-x86 architectures this_cpu_add() internally does
preempt_disable() and preempt_enable().  So the patch is a small
optimisation for x86 and a larger deoptimisation for non-x86.

I think I'll apply it, as the call frequency is low (correct?) and the
problem will correct itself as other architectures implement their
atomic this_cpu_foo() operations.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18 21:40   ` Andrew Morton
@ 2011-08-19  0:00     ` Greg Thelen
  -1 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-08-19  0:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, linux-arch, Valdis.Kletnieks, jweiner

On Thu, Aug 18, 2011 at 2:40 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> (cc linux-arch)
>
> On Wed, 17 Aug 2011 23:50:53 -0700
> Greg Thelen <gthelen@google.com> wrote:
>
>> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
>> unnecessarily disabling preemption when adjusting per-cpu counters:
>>     preempt_disable()
>>     __this_cpu_xxx()
>>     __this_cpu_yyy()
>>     preempt_enable()
>>
>> This change does not disable preemption and thus CPU switch is possible
>> within these routines.  This does not cause a problem because the total
>> of all cpu counters is summed when reporting stats.  Now both
>> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>>     this_cpu_xxx()
>>     this_cpu_yyy()
>>
>> ...
>>
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>>                                        bool file, int nr_pages)
>>  {
>> -     preempt_disable();
>> -
>>       if (file)
>> -             __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>>       else
>> -             __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>>
>>       /* pagein of a big page is an event. So, ignore page size */
>>       if (nr_pages > 0)
>> -             __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>> +             this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>>       else {
>> -             __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>> +             this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>>               nr_pages = -nr_pages; /* for event */
>>       }
>>
>> -     __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>> -
>> -     preempt_enable();
>> +     this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>>  }
>
> On non-x86 architectures this_cpu_add() internally does
> preempt_disable() and preempt_enable().  So the patch is a small
> optimisation for x86 and a larger deoptimisation for non-x86.
>
> I think I'll apply it, as the call frequency is low (correct?) and the
> problem will correct itself as other architectures implement their
> atomic this_cpu_foo() operations.

mem_cgroup_charge_statistics() is a common operation, which is called
on each memcg page charge and uncharge.

The per arch/config effects of this patch:
* non-preemptible kernels: there's no difference before/after this patch.
* preemptible x86: this patch helps by removing an unnecessary
preempt_disable/enable.
* preemptible non-x86: this patch hurts by adding implicit
preempt_disable/enable around each operation.

So I am uncomfortable this patch's unmeasured impact on archs that do
not have atomic this_cpu_foo() operations.  Please drop the patch from
mmotm.  Sorry for the noise.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-19  0:00     ` Greg Thelen
  0 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-08-19  0:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Daisuke Nishimura, linux-arch, Valdis.Kletnieks, jweiner

On Thu, Aug 18, 2011 at 2:40 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
> (cc linux-arch)
>
> On Wed, 17 Aug 2011 23:50:53 -0700
> Greg Thelen <gthelen@google.com> wrote:
>
>> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
>> unnecessarily disabling preemption when adjusting per-cpu counters:
>>     preempt_disable()
>>     __this_cpu_xxx()
>>     __this_cpu_yyy()
>>     preempt_enable()
>>
>> This change does not disable preemption and thus CPU switch is possible
>> within these routines.  This does not cause a problem because the total
>> of all cpu counters is summed when reporting stats.  Now both
>> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>>     this_cpu_xxx()
>>     this_cpu_yyy()
>>
>> ...
>>
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -664,24 +664,20 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,
>>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>>                                        bool file, int nr_pages)
>>  {
>> -     preempt_disable();
>> -
>>       if (file)
>> -             __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>>       else
>> -             __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>> +             this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
>>
>>       /* pagein of a big page is an event. So, ignore page size */
>>       if (nr_pages > 0)
>> -             __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>> +             this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
>>       else {
>> -             __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>> +             this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
>>               nr_pages = -nr_pages; /* for event */
>>       }
>>
>> -     __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>> -
>> -     preempt_enable();
>> +     this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
>>  }
>
> On non-x86 architectures this_cpu_add() internally does
> preempt_disable() and preempt_enable().  So the patch is a small
> optimisation for x86 and a larger deoptimisation for non-x86.
>
> I think I'll apply it, as the call frequency is low (correct?) and the
> problem will correct itself as other architectures implement their
> atomic this_cpu_foo() operations.

mem_cgroup_charge_statistics() is a common operation, which is called
on each memcg page charge and uncharge.

The per arch/config effects of this patch:
* non-preemptible kernels: there's no difference before/after this patch.
* preemptible x86: this patch helps by removing an unnecessary
preempt_disable/enable.
* preemptible non-x86: this patch hurts by adding implicit
preempt_disable/enable around each operation.

So I am uncomfortable this patch's unmeasured impact on archs that do
not have atomic this_cpu_foo() operations.  Please drop the patch from
mmotm.  Sorry for the noise.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18 21:40   ` Andrew Morton
@ 2011-08-25 14:57     ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 14:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Greg Thelen, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura, linux-arch

On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> 
> I think I'll apply it, as the call frequency is low (correct?) and the
> problem will correct itself as other architectures implement their
> atomic this_cpu_foo() operations. 

Which leads me to wonder, can anything but x86 implement that this_cpu_*
muck? I doubt any of the risk chips can actually do all this.
Maybe Itanic, but then that seems to be dying fast.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 14:57     ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 14:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Greg Thelen, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura, linux-arch

On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> 
> I think I'll apply it, as the call frequency is low (correct?) and the
> problem will correct itself as other architectures implement their
> atomic this_cpu_foo() operations. 

Which leads me to wonder, can anything but x86 implement that this_cpu_*
muck? I doubt any of the risk chips can actually do all this.
Maybe Itanic, but then that seems to be dying fast.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 14:57     ` Peter Zijlstra
@ 2011-08-25 15:11       ` Christoph Lameter
  -1 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 15:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andrew Morton, Greg Thelen, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura, linux-arch

On Thu, 25 Aug 2011, Peter Zijlstra wrote:

> On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> >
> > I think I'll apply it, as the call frequency is low (correct?) and the
> > problem will correct itself as other architectures implement their
> > atomic this_cpu_foo() operations.
>
> Which leads me to wonder, can anything but x86 implement that this_cpu_*
> muck? I doubt any of the risk chips can actually do all this.
> Maybe Itanic, but then that seems to be dying fast.

The cpu needs to have an RMW instruction that does something to a
variable relative to a register that points to the per cpu base.

Thats generally possible. The problem is how expensive the RMW is going to
be.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 15:11       ` Christoph Lameter
  0 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 15:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andrew Morton, Greg Thelen, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura, linux-arch

On Thu, 25 Aug 2011, Peter Zijlstra wrote:

> On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> >
> > I think I'll apply it, as the call frequency is low (correct?) and the
> > problem will correct itself as other architectures implement their
> > atomic this_cpu_foo() operations.
>
> Which leads me to wonder, can anything but x86 implement that this_cpu_*
> muck? I doubt any of the risk chips can actually do all this.
> Maybe Itanic, but then that seems to be dying fast.

The cpu needs to have an RMW instruction that does something to a
variable relative to a register that points to the per cpu base.

Thats generally possible. The problem is how expensive the RMW is going to
be.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 15:11       ` Christoph Lameter
@ 2011-08-25 16:20         ` James Bottomley
  -1 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 16:20 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> 
> > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > >
> > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > problem will correct itself as other architectures implement their
> > > atomic this_cpu_foo() operations.
> >
> > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > muck? I doubt any of the risk chips can actually do all this.
> > Maybe Itanic, but then that seems to be dying fast.
> 
> The cpu needs to have an RMW instruction that does something to a
> variable relative to a register that points to the per cpu base.
> 
> Thats generally possible. The problem is how expensive the RMW is going to
> be.

Risc systems generally don't have a single instruction for this, that's
correct.  Obviously we can do it as a non atomic sequence: read
variable, compute relative, read, modify, write ... but there's
absolutely no point hand crafting that in asm since the compiler can
usually work it out nicely.  And, of course, to have this atomic, we
have to use locks, which ends up being very expensive.

James



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 16:20         ` James Bottomley
  0 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 16:20 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> 
> > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > >
> > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > problem will correct itself as other architectures implement their
> > > atomic this_cpu_foo() operations.
> >
> > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > muck? I doubt any of the risk chips can actually do all this.
> > Maybe Itanic, but then that seems to be dying fast.
> 
> The cpu needs to have an RMW instruction that does something to a
> variable relative to a register that points to the per cpu base.
> 
> Thats generally possible. The problem is how expensive the RMW is going to
> be.

Risc systems generally don't have a single instruction for this, that's
correct.  Obviously we can do it as a non atomic sequence: read
variable, compute relative, read, modify, write ... but there's
absolutely no point hand crafting that in asm since the compiler can
usually work it out nicely.  And, of course, to have this atomic, we
have to use locks, which ends up being very expensive.

James


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 16:20         ` James Bottomley
@ 2011-08-25 16:31           ` Christoph Lameter
  -1 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 16:31 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> >
> > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > > >
> > > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > > problem will correct itself as other architectures implement their
> > > > atomic this_cpu_foo() operations.
> > >
> > > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > > muck? I doubt any of the risk chips can actually do all this.
> > > Maybe Itanic, but then that seems to be dying fast.
> >
> > The cpu needs to have an RMW instruction that does something to a
> > variable relative to a register that points to the per cpu base.
> >
> > Thats generally possible. The problem is how expensive the RMW is going to
> > be.
>
> Risc systems generally don't have a single instruction for this, that's
> correct.  Obviously we can do it as a non atomic sequence: read
> variable, compute relative, read, modify, write ... but there's
> absolutely no point hand crafting that in asm since the compiler can
> usually work it out nicely.  And, of course, to have this atomic, we
> have to use locks, which ends up being very expensive.

ARM seems to have these LDREX/STREX instructions for that purpose which
seem to be used for generating atomic instructions without lockes. I guess
other RISC architectures have similar means of doing it?





^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 16:31           ` Christoph Lameter
  0 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 16:31 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> >
> > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > > >
> > > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > > problem will correct itself as other architectures implement their
> > > > atomic this_cpu_foo() operations.
> > >
> > > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > > muck? I doubt any of the risk chips can actually do all this.
> > > Maybe Itanic, but then that seems to be dying fast.
> >
> > The cpu needs to have an RMW instruction that does something to a
> > variable relative to a register that points to the per cpu base.
> >
> > Thats generally possible. The problem is how expensive the RMW is going to
> > be.
>
> Risc systems generally don't have a single instruction for this, that's
> correct.  Obviously we can do it as a non atomic sequence: read
> variable, compute relative, read, modify, write ... but there's
> absolutely no point hand crafting that in asm since the compiler can
> usually work it out nicely.  And, of course, to have this atomic, we
> have to use locks, which ends up being very expensive.

ARM seems to have these LDREX/STREX instructions for that purpose which
seem to be used for generating atomic instructions without lockes. I guess
other RISC architectures have similar means of doing it?




--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 16:31           ` Christoph Lameter
@ 2011-08-25 16:34             ` James Bottomley
  -1 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 16:34 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch



Christoph Lameter <cl@linux.com> wrote:

>On Thu, 25 Aug 2011, James Bottomley wrote:
>
>> On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
>> > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
>> >
>> > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
>> > > >
>> > > > I think I'll apply it, as the call frequency is low (correct?)
>and the
>> > > > problem will correct itself as other architectures implement
>their
>> > > > atomic this_cpu_foo() operations.
>> > >
>> > > Which leads me to wonder, can anything but x86 implement that
>this_cpu_*
>> > > muck? I doubt any of the risk chips can actually do all this.
>> > > Maybe Itanic, but then that seems to be dying fast.
>> >
>> > The cpu needs to have an RMW instruction that does something to a
>> > variable relative to a register that points to the per cpu base.
>> >
>> > Thats generally possible. The problem is how expensive the RMW is
>going to
>> > be.
>>
>> Risc systems generally don't have a single instruction for this,
>that's
>> correct.  Obviously we can do it as a non atomic sequence: read
>> variable, compute relative, read, modify, write ... but there's
>> absolutely no point hand crafting that in asm since the compiler can
>> usually work it out nicely.  And, of course, to have this atomic, we
>> have to use locks, which ends up being very expensive.
>
>ARM seems to have these LDREX/STREX instructions for that purpose which
>seem to be used for generating atomic instructions without lockes. I
>guess
>other RISC architectures have similar means of doing it?

Arm isn't really risc.  Most don't.  However even with ldrex/strex you need two instructions for rmw.

James
-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity and top posting.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 16:34             ` James Bottomley
  0 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 16:34 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch



Christoph Lameter <cl@linux.com> wrote:

>On Thu, 25 Aug 2011, James Bottomley wrote:
>
>> On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
>> > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
>> >
>> > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
>> > > >
>> > > > I think I'll apply it, as the call frequency is low (correct?)
>and the
>> > > > problem will correct itself as other architectures implement
>their
>> > > > atomic this_cpu_foo() operations.
>> > >
>> > > Which leads me to wonder, can anything but x86 implement that
>this_cpu_*
>> > > muck? I doubt any of the risk chips can actually do all this.
>> > > Maybe Itanic, but then that seems to be dying fast.
>> >
>> > The cpu needs to have an RMW instruction that does something to a
>> > variable relative to a register that points to the per cpu base.
>> >
>> > Thats generally possible. The problem is how expensive the RMW is
>going to
>> > be.
>>
>> Risc systems generally don't have a single instruction for this,
>that's
>> correct.  Obviously we can do it as a non atomic sequence: read
>> variable, compute relative, read, modify, write ... but there's
>> absolutely no point hand crafting that in asm since the compiler can
>> usually work it out nicely.  And, of course, to have this atomic, we
>> have to use locks, which ends up being very expensive.
>
>ARM seems to have these LDREX/STREX instructions for that purpose which
>seem to be used for generating atomic instructions without lockes. I
>guess
>other RISC architectures have similar means of doing it?

Arm isn't really risc.  Most don't.  However even with ldrex/strex you need two instructions for rmw.

James
-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity and top posting.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 16:34             ` James Bottomley
@ 2011-08-25 17:07               ` Christoph Lameter
  -1 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 17:07 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> >ARM seems to have these LDREX/STREX instructions for that purpose which
> >seem to be used for generating atomic instructions without lockes. I
> >guess
> >other RISC architectures have similar means of doing it?
>
> Arm isn't really risc.  Most don't.  However even with ldrex/strex you need two instructions for rmw.

Well then what is "really risc"? RISC is an old beaten down marketing term
AFAICT and ARM claims it too.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 17:07               ` Christoph Lameter
  0 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 17:07 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> >ARM seems to have these LDREX/STREX instructions for that purpose which
> >seem to be used for generating atomic instructions without lockes. I
> >guess
> >other RISC architectures have similar means of doing it?
>
> Arm isn't really risc.  Most don't.  However even with ldrex/strex you need two instructions for rmw.

Well then what is "really risc"? RISC is an old beaten down marketing term
AFAICT and ARM claims it too.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 17:07               ` Christoph Lameter
@ 2011-08-25 18:34                 ` James Bottomley
  -1 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 18:34 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch



Christoph Lameter <cl@linux.com> wrote:

>On Thu, 25 Aug 2011, James Bottomley wrote:
>
>> >ARM seems to have these LDREX/STREX instructions for that purpose
>which
>> >seem to be used for generating atomic instructions without lockes. I
>> >guess
>> >other RISC architectures have similar means of doing it?
>>
>> Arm isn't really risc.  Most don't.  However even with ldrex/strex
>you need two instructions for rmw.
>
>Well then what is "really risc"? RISC is an old beaten down marketing
>term
>AFAICT and ARM claims it too.

Reduced Instruction Set Computer.  This is why we're unlikely to have complex atomic instructions: the principle of risc is that you build them up from basic ones.

James 
-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity and top posting.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 18:34                 ` James Bottomley
  0 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 18:34 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch



Christoph Lameter <cl@linux.com> wrote:

>On Thu, 25 Aug 2011, James Bottomley wrote:
>
>> >ARM seems to have these LDREX/STREX instructions for that purpose
>which
>> >seem to be used for generating atomic instructions without lockes. I
>> >guess
>> >other RISC architectures have similar means of doing it?
>>
>> Arm isn't really risc.  Most don't.  However even with ldrex/strex
>you need two instructions for rmw.
>
>Well then what is "really risc"? RISC is an old beaten down marketing
>term
>AFAICT and ARM claims it too.

Reduced Instruction Set Computer.  This is why we're unlikely to have complex atomic instructions: the principle of risc is that you build them up from basic ones.

James 
-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity and top posting.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 16:31           ` Christoph Lameter
@ 2011-08-25 18:40             ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:40 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 11:31 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, James Bottomley wrote:
> 
> > On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> > > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> > >
> > > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > > > >
> > > > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > > > problem will correct itself as other architectures implement their
> > > > > atomic this_cpu_foo() operations.
> > > >
> > > > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > > > muck? I doubt any of the risk chips can actually do all this.
> > > > Maybe Itanic, but then that seems to be dying fast.
> > >
> > > The cpu needs to have an RMW instruction that does something to a
> > > variable relative to a register that points to the per cpu base.
> > >
> > > Thats generally possible. The problem is how expensive the RMW is going to
> > > be.
> >
> > Risc systems generally don't have a single instruction for this, that's
> > correct.  Obviously we can do it as a non atomic sequence: read
> > variable, compute relative, read, modify, write ... but there's
> > absolutely no point hand crafting that in asm since the compiler can
> > usually work it out nicely.  And, of course, to have this atomic, we
> > have to use locks, which ends up being very expensive.
> 
> ARM seems to have these LDREX/STREX instructions for that purpose which
> seem to be used for generating atomic instructions without lockes. I guess
> other RISC architectures have similar means of doing it?

Even with LL/SC and the CPU base in a register you need to do something
like:

again:
	LL $target-reg, $cpubase-reg + offset
	<foo>
	SC $ret, $target-reg, $cpubase-reg + offset
	if !$ret goto again

Its the +offset that's problematic, it either doesn't exist or is very
limited (a quick look at the MIPS instruction set gives a limit of 64k).

Without the +offset you need:

again:
	$tmp-reg = $cpubase-reg
	$tmp-reg += offset;

	LL $target-reg, $tmp-reg
	<foo>
	SC $ret, $target-reg, $tmp-reg
	if !$ret goto again


Which is wide open to migration races. Also, very often there are
constraints on LL/SC that mandate we use preempt_disable/enable around
its use, which pretty much voids the whole purpose, since if we disable
preemption we might as well just use C (ARM belongs in this class).

It does look POWERPC's lwarx/stwcx is sane enough, although the
instruction reference I found doesn't list what happens if the LL/SC
doesn't use the same effective address or has other loads/stores in
between, if its ok with those and simply fails the SC it should be good.

Still, creating atomic ops for per-cpu ops might be more expensive than
simply doing the preempt-disable/rmw/enable dance, dunno don't know
these archs that well.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 18:40             ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:40 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 11:31 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, James Bottomley wrote:
> 
> > On Thu, 2011-08-25 at 10:11 -0500, Christoph Lameter wrote:
> > > On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> > >
> > > > On Thu, 2011-08-18 at 14:40 -0700, Andrew Morton wrote:
> > > > >
> > > > > I think I'll apply it, as the call frequency is low (correct?) and the
> > > > > problem will correct itself as other architectures implement their
> > > > > atomic this_cpu_foo() operations.
> > > >
> > > > Which leads me to wonder, can anything but x86 implement that this_cpu_*
> > > > muck? I doubt any of the risk chips can actually do all this.
> > > > Maybe Itanic, but then that seems to be dying fast.
> > >
> > > The cpu needs to have an RMW instruction that does something to a
> > > variable relative to a register that points to the per cpu base.
> > >
> > > Thats generally possible. The problem is how expensive the RMW is going to
> > > be.
> >
> > Risc systems generally don't have a single instruction for this, that's
> > correct.  Obviously we can do it as a non atomic sequence: read
> > variable, compute relative, read, modify, write ... but there's
> > absolutely no point hand crafting that in asm since the compiler can
> > usually work it out nicely.  And, of course, to have this atomic, we
> > have to use locks, which ends up being very expensive.
> 
> ARM seems to have these LDREX/STREX instructions for that purpose which
> seem to be used for generating atomic instructions without lockes. I guess
> other RISC architectures have similar means of doing it?

Even with LL/SC and the CPU base in a register you need to do something
like:

again:
	LL $target-reg, $cpubase-reg + offset
	<foo>
	SC $ret, $target-reg, $cpubase-reg + offset
	if !$ret goto again

Its the +offset that's problematic, it either doesn't exist or is very
limited (a quick look at the MIPS instruction set gives a limit of 64k).

Without the +offset you need:

again:
	$tmp-reg = $cpubase-reg
	$tmp-reg += offset;

	LL $target-reg, $tmp-reg
	<foo>
	SC $ret, $target-reg, $tmp-reg
	if !$ret goto again


Which is wide open to migration races. Also, very often there are
constraints on LL/SC that mandate we use preempt_disable/enable around
its use, which pretty much voids the whole purpose, since if we disable
preemption we might as well just use C (ARM belongs in this class).

It does look POWERPC's lwarx/stwcx is sane enough, although the
instruction reference I found doesn't list what happens if the LL/SC
doesn't use the same effective address or has other loads/stores in
between, if its ok with those and simply fails the SC it should be good.

Still, creating atomic ops for per-cpu ops might be more expensive than
simply doing the preempt-disable/rmw/enable dance, dunno don't know
these archs that well.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 18:34                 ` James Bottomley
@ 2011-08-25 18:46                   ` Christoph Lameter
  -1 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 18:46 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> >Well then what is "really risc"? RISC is an old beaten down marketing
> >term
> >AFAICT and ARM claims it too.
>
> Reduced Instruction Set Computer.  This is why we're unlikely to have
> complex atomic instructions: the principle of risc is that you build
> them up from basic ones.

RISC cpus have instruction to construct complex atomic actions by the cpu
as I have shown before for ARM.

Principles always have exceptions to them.

(That statement in itself is a principle that should have an exception I
guess. But then language often only makes sense when it contains
contradictions.)


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 18:46                   ` Christoph Lameter
  0 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 18:46 UTC (permalink / raw)
  To: James Bottomley
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, James Bottomley wrote:

> >Well then what is "really risc"? RISC is an old beaten down marketing
> >term
> >AFAICT and ARM claims it too.
>
> Reduced Instruction Set Computer.  This is why we're unlikely to have
> complex atomic instructions: the principle of risc is that you build
> them up from basic ones.

RISC cpus have instruction to construct complex atomic actions by the cpu
as I have shown before for ARM.

Principles always have exceptions to them.

(That statement in itself is a principle that should have an exception I
guess. But then language often only makes sense when it contains
contradictions.)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 18:46                   ` Christoph Lameter
@ 2011-08-25 18:49                     ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:49 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM. 

Right, but it only makes sense if the whole thing remains cheaper than
the trivial implementation already available.

For instance, the ARM LL/SC constraints pretty much mandate we do
preempt_disable()/preempt_enable() around them, at which point the point
of doing LL/SC is gone (except maybe for the irqsafe_this_cpu_* stuff).

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 18:49                     ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:49 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM. 

Right, but it only makes sense if the whole thing remains cheaper than
the trivial implementation already available.

For instance, the ARM LL/SC constraints pretty much mandate we do
preempt_disable()/preempt_enable() around them, at which point the point
of doing LL/SC is gone (except maybe for the irqsafe_this_cpu_* stuff).

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 18:46                   ` Christoph Lameter
@ 2011-08-25 18:53                     ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:53 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 20:49 +0200, Peter Zijlstra wrote:
> the ARM LL/SC constraints pretty much mandate we do
> preempt_disable()/preempt_enable() around them, 

My bad, that was MIPS, it states that portable programs should not issue
load/store/prefetch insn inside a LL/SC region or the behaviour is
undefined or somesuch.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 18:53                     ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 18:53 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 20:49 +0200, Peter Zijlstra wrote:
> the ARM LL/SC constraints pretty much mandate we do
> preempt_disable()/preempt_enable() around them, 

My bad, that was MIPS, it states that portable programs should not issue
load/store/prefetch insn inside a LL/SC region or the behaviour is
undefined or somesuch.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 18:46                   ` Christoph Lameter
@ 2011-08-25 19:05                     ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 19:05 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM. 

Also, I thought this_cpu thing's were at best locally atomic. If you
make them full blown atomic ops then even __this_cpu ops will have to be
full atomic ops, otherwise:


CPU0			CPU(1)

this_cpu_inc(&foo);	preempt_disable();
			__this_cpu_inc(&foo);
			preempt_enable();

might step on each other's toes.

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 19:05                     ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 19:05 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM. 

Also, I thought this_cpu thing's were at best locally atomic. If you
make them full blown atomic ops then even __this_cpu ops will have to be
full atomic ops, otherwise:


CPU0			CPU(1)

this_cpu_inc(&foo);	preempt_disable();
			__this_cpu_inc(&foo);
			preempt_enable();

might step on each other's toes.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 19:05                     ` Peter Zijlstra
@ 2011-08-25 19:19                       ` Christoph Lameter
  -1 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 19:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, Peter Zijlstra wrote:

> Also, I thought this_cpu thing's were at best locally atomic. If you
> make them full blown atomic ops then even __this_cpu ops will have to be
> full atomic ops, otherwise:
>
>
> CPU0			CPU(1)
>
> this_cpu_inc(&foo);	preempt_disable();
> 			__this_cpu_inc(&foo);
> 			preempt_enable();
>
> might step on each other's toes.

They would both have their own instance of "foo". per cpu atomicity is
only one requirement of this_cpu_ops. The other is the ability to relocate
accesses relative to the current per cpu area.

Full blown atomicity is almost a superset of per cpu atomicity but its
only usable if the full atomic instructions can also relocate accesses
relative to some base.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 19:19                       ` Christoph Lameter
  0 siblings, 0 replies; 56+ messages in thread
From: Christoph Lameter @ 2011-08-25 19:19 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 25 Aug 2011, Peter Zijlstra wrote:

> Also, I thought this_cpu thing's were at best locally atomic. If you
> make them full blown atomic ops then even __this_cpu ops will have to be
> full atomic ops, otherwise:
>
>
> CPU0			CPU(1)
>
> this_cpu_inc(&foo);	preempt_disable();
> 			__this_cpu_inc(&foo);
> 			preempt_enable();
>
> might step on each other's toes.

They would both have their own instance of "foo". per cpu atomicity is
only one requirement of this_cpu_ops. The other is the ability to relocate
accesses relative to the current per cpu area.

Full blown atomicity is almost a superset of per cpu atomicity but its
only usable if the full atomic instructions can also relocate accesses
relative to some base.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 18:46                   ` Christoph Lameter
@ 2011-08-25 19:29                     ` James Bottomley
  -1 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 19:29 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, James Bottomley wrote:
> 
> > >Well then what is "really risc"? RISC is an old beaten down marketing
> > >term
> > >AFAICT and ARM claims it too.
> >
> > Reduced Instruction Set Computer.  This is why we're unlikely to have
> > complex atomic instructions: the principle of risc is that you build
> > them up from basic ones.
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM.
> 
> Principles always have exceptions to them.
> 
> (That statement in itself is a principle that should have an exception I
> guess. But then language often only makes sense when it contains
> contradictions.)

We seem to be talking at cross purposes.  I'm not saying a risc system
can't do this ... of course the risc primitives can build into whatever
you want.  To make it atomic, you simply add locking.  What I'm saying
is that open coding asm in a macro makes no sense because the compiler
will do it better from C.  Plus, since the net purpose of this patch is
to require us to lock around each op instead of doing a global lock (or
in this case preempt disable) then you're making us less efficient at
executing it.

Therefore from the risc point of view, most of the this_cpu_xxx
operations are things that we don't really care about except that the
result would be easier to read in C.

James



^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 19:29                     ` James Bottomley
  0 siblings, 0 replies; 56+ messages in thread
From: James Bottomley @ 2011-08-25 19:29 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Peter Zijlstra, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 13:46 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, James Bottomley wrote:
> 
> > >Well then what is "really risc"? RISC is an old beaten down marketing
> > >term
> > >AFAICT and ARM claims it too.
> >
> > Reduced Instruction Set Computer.  This is why we're unlikely to have
> > complex atomic instructions: the principle of risc is that you build
> > them up from basic ones.
> 
> RISC cpus have instruction to construct complex atomic actions by the cpu
> as I have shown before for ARM.
> 
> Principles always have exceptions to them.
> 
> (That statement in itself is a principle that should have an exception I
> guess. But then language often only makes sense when it contains
> contradictions.)

We seem to be talking at cross purposes.  I'm not saying a risc system
can't do this ... of course the risc primitives can build into whatever
you want.  To make it atomic, you simply add locking.  What I'm saying
is that open coding asm in a macro makes no sense because the compiler
will do it better from C.  Plus, since the net purpose of this patch is
to require us to lock around each op instead of doing a global lock (or
in this case preempt disable) then you're making us less efficient at
executing it.

Therefore from the risc point of view, most of the this_cpu_xxx
operations are things that we don't really care about except that the
result would be easier to read in C.

James


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 19:19                       ` Christoph Lameter
@ 2011-08-25 22:56                         ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 22:56 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 14:19 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> 
> > Also, I thought this_cpu thing's were at best locally atomic. If you
> > make them full blown atomic ops then even __this_cpu ops will have to be
> > full atomic ops, otherwise:
> >
> >
> > CPU0			CPU(1)
> >
> > this_cpu_inc(&foo);	preempt_disable();
> > 			__this_cpu_inc(&foo);
> > 			preempt_enable();
> >
> > might step on each other's toes.
> 
> They would both have their own instance of "foo". per cpu atomicity is
> only one requirement of this_cpu_ops. The other is the ability to relocate
> accesses relative to the current per cpu area.

Ah, but not if the this_cpu_inc() thing ends up being more than a single
instruction, then you have preemption/migration windows. Only when LL/SC
can deal with SC having a different EA from the LL and supports a big
enough offset could this possibly work.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 22:56                         ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 22:56 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: James Bottomley, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 14:19 -0500, Christoph Lameter wrote:
> On Thu, 25 Aug 2011, Peter Zijlstra wrote:
> 
> > Also, I thought this_cpu thing's were at best locally atomic. If you
> > make them full blown atomic ops then even __this_cpu ops will have to be
> > full atomic ops, otherwise:
> >
> >
> > CPU0			CPU(1)
> >
> > this_cpu_inc(&foo);	preempt_disable();
> > 			__this_cpu_inc(&foo);
> > 			preempt_enable();
> >
> > might step on each other's toes.
> 
> They would both have their own instance of "foo". per cpu atomicity is
> only one requirement of this_cpu_ops. The other is the ability to relocate
> accesses relative to the current per cpu area.

Ah, but not if the this_cpu_inc() thing ends up being more than a single
instruction, then you have preemption/migration windows. Only when LL/SC
can deal with SC having a different EA from the LL and supports a big
enough offset could this possibly work.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-25 19:29                     ` James Bottomley
@ 2011-08-25 23:01                       ` Peter Zijlstra
  -1 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 23:01 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Lameter, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 12:29 -0700, James Bottomley wrote:
> 
> Therefore from the risc point of view, most of the this_cpu_xxx
> operations are things that we don't really care about except that the
> result would be easier to read in C. 

Right, so the current fallback case is pretty much the optimal case for
the RISC machines, which ends up with generic code being better off not
using it much and instead preferring __this_cpu if there's more than
one.

I mean, its absolutely awesome these things are 1 instruction on x86,
but if we pessimize all other 20-odd architectures its just not cool.


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-08-25 23:01                       ` Peter Zijlstra
  0 siblings, 0 replies; 56+ messages in thread
From: Peter Zijlstra @ 2011-08-25 23:01 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Lameter, Andrew Morton, Greg Thelen, linux-kernel,
	linux-mm, KAMEZAWA Hiroyuki, Balbir Singh, Daisuke Nishimura,
	linux-arch

On Thu, 2011-08-25 at 12:29 -0700, James Bottomley wrote:
> 
> Therefore from the risc point of view, most of the this_cpu_xxx
> operations are things that we don't really care about except that the
> result would be easier to read in C. 

Right, so the current fallback case is pretty much the optimal case for
the RISC machines, which ends up with generic code being better off not
using it much and instead preferring __this_cpu if there's more than
one.

I mean, its absolutely awesome these things are 1 instruction on x86,
but if we pessimize all other 20-odd architectures its just not cool.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-08-18  6:50 ` Greg Thelen
@ 2011-09-06  9:58   ` Johannes Weiner
  -1 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-09-06  9:58 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Signed-off-by: Greg Thelen <gthelen@google.com>

I just noticed that both cases have preemption disabled anyway because
of the page_cgroup bit spinlock.

So removing the preempt_disable() is fine but we can even keep the
non-atomic __this_cpu operations.

Something like this instead?

---
From: Johannes Weiner <jweiner@redhat.com>
Subject: mm: memcg: remove needless recursive preemption disabling

Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
spinlock, which implies disabled preemption.

The same goes for the explicit preemption disabling to account mapped
file pages in mem_cgroup_move_account().

The explicit disabling of preemption in both cases is redundant.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
---
 mm/memcontrol.c |    6 ------
 1 file changed, 6 deletions(-)

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -618,8 +618,6 @@ static unsigned long mem_cgroup_read_eve
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {
-	preempt_disable();
-
 	if (file)
 		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
 	else
@@ -634,8 +632,6 @@ static void mem_cgroup_charge_statistics
 	}
 
 	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
-
-	preempt_enable();
 }
 
 unsigned long
@@ -2582,10 +2578,8 @@ static int mem_cgroup_move_account(struc
 
 	if (PageCgroupFileMapped(pc)) {
 		/* Update mapped_file data for mem_cgroup */
-		preempt_disable();
 		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
 		__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		preempt_enable();
 	}
 	mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
 	if (uncharge)

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-09-06  9:58   ` Johannes Weiner
  0 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-09-06  9:58 UTC (permalink / raw)
  To: Greg Thelen
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> unnecessarily disabling preemption when adjusting per-cpu counters:
>     preempt_disable()
>     __this_cpu_xxx()
>     __this_cpu_yyy()
>     preempt_enable()
> 
> This change does not disable preemption and thus CPU switch is possible
> within these routines.  This does not cause a problem because the total
> of all cpu counters is summed when reporting stats.  Now both
> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>     this_cpu_xxx()
>     this_cpu_yyy()
> 
> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Signed-off-by: Greg Thelen <gthelen@google.com>

I just noticed that both cases have preemption disabled anyway because
of the page_cgroup bit spinlock.

So removing the preempt_disable() is fine but we can even keep the
non-atomic __this_cpu operations.

Something like this instead?

---
From: Johannes Weiner <jweiner@redhat.com>
Subject: mm: memcg: remove needless recursive preemption disabling

Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
spinlock, which implies disabled preemption.

The same goes for the explicit preemption disabling to account mapped
file pages in mem_cgroup_move_account().

The explicit disabling of preemption in both cases is redundant.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
---
 mm/memcontrol.c |    6 ------
 1 file changed, 6 deletions(-)

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -618,8 +618,6 @@ static unsigned long mem_cgroup_read_eve
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {
-	preempt_disable();
-
 	if (file)
 		__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
 	else
@@ -634,8 +632,6 @@ static void mem_cgroup_charge_statistics
 	}
 
 	__this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
-
-	preempt_enable();
 }
 
 unsigned long
@@ -2582,10 +2578,8 @@ static int mem_cgroup_move_account(struc
 
 	if (PageCgroupFileMapped(pc)) {
 		/* Update mapped_file data for mem_cgroup */
-		preempt_disable();
 		__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
 		__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-		preempt_enable();
 	}
 	mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
 	if (uncharge)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-09-06  9:58   ` Johannes Weiner
@ 2011-09-06 10:04     ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-09-06 10:04 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, 6 Sep 2011 11:58:52 +0200
Johannes Weiner <jweiner@redhat.com> wrote:

> On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > unnecessarily disabling preemption when adjusting per-cpu counters:
> >     preempt_disable()
> >     __this_cpu_xxx()
> >     __this_cpu_yyy()
> >     preempt_enable()
> > 
> > This change does not disable preemption and thus CPU switch is possible
> > within these routines.  This does not cause a problem because the total
> > of all cpu counters is summed when reporting stats.  Now both
> > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> >     this_cpu_xxx()
> >     this_cpu_yyy()
> > 
> > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Signed-off-by: Greg Thelen <gthelen@google.com>
> 
> I just noticed that both cases have preemption disabled anyway because
> of the page_cgroup bit spinlock.
> 
> So removing the preempt_disable() is fine but we can even keep the
> non-atomic __this_cpu operations.
> 
> Something like this instead?
> 
> ---
> From: Johannes Weiner <jweiner@redhat.com>
> Subject: mm: memcg: remove needless recursive preemption disabling
> 
> Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> spinlock, which implies disabled preemption.
> 
> The same goes for the explicit preemption disabling to account mapped
> file pages in mem_cgroup_move_account().
> 
> The explicit disabling of preemption in both cases is redundant.
> 
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>

Could you add comments as
"This operation is called under bit spin lock !" ?

Nice catch.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-09-06 10:04     ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-09-06 10:04 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, 6 Sep 2011 11:58:52 +0200
Johannes Weiner <jweiner@redhat.com> wrote:

> On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > unnecessarily disabling preemption when adjusting per-cpu counters:
> >     preempt_disable()
> >     __this_cpu_xxx()
> >     __this_cpu_yyy()
> >     preempt_enable()
> > 
> > This change does not disable preemption and thus CPU switch is possible
> > within these routines.  This does not cause a problem because the total
> > of all cpu counters is summed when reporting stats.  Now both
> > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> >     this_cpu_xxx()
> >     this_cpu_yyy()
> > 
> > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > Signed-off-by: Greg Thelen <gthelen@google.com>
> 
> I just noticed that both cases have preemption disabled anyway because
> of the page_cgroup bit spinlock.
> 
> So removing the preempt_disable() is fine but we can even keep the
> non-atomic __this_cpu operations.
> 
> Something like this instead?
> 
> ---
> From: Johannes Weiner <jweiner@redhat.com>
> Subject: mm: memcg: remove needless recursive preemption disabling
> 
> Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> spinlock, which implies disabled preemption.
> 
> The same goes for the explicit preemption disabling to account mapped
> file pages in mem_cgroup_move_account().
> 
> The explicit disabling of preemption in both cases is redundant.
> 
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>

Could you add comments as
"This operation is called under bit spin lock !" ?

Nice catch.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-09-06 10:49       ` Johannes Weiner
@ 2011-09-06 10:45         ` KAMEZAWA Hiroyuki
  -1 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-09-06 10:45 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, 6 Sep 2011 12:49:15 +0200
Johannes Weiner <jweiner@redhat.com> wrote:

> On Tue, Sep 06, 2011 at 07:04:24PM +0900, KAMEZAWA Hiroyuki wrote:
> > On Tue, 6 Sep 2011 11:58:52 +0200
> > Johannes Weiner <jweiner@redhat.com> wrote:
> > 
> > > On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > > > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > > > unnecessarily disabling preemption when adjusting per-cpu counters:
> > > >     preempt_disable()
> > > >     __this_cpu_xxx()
> > > >     __this_cpu_yyy()
> > > >     preempt_enable()
> > > > 
> > > > This change does not disable preemption and thus CPU switch is possible
> > > > within these routines.  This does not cause a problem because the total
> > > > of all cpu counters is summed when reporting stats.  Now both
> > > > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> > > >     this_cpu_xxx()
> > > >     this_cpu_yyy()
> > > > 
> > > > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > > > Signed-off-by: Greg Thelen <gthelen@google.com>
> > > 
> > > I just noticed that both cases have preemption disabled anyway because
> > > of the page_cgroup bit spinlock.
> > > 
> > > So removing the preempt_disable() is fine but we can even keep the
> > > non-atomic __this_cpu operations.
> > > 
> > > Something like this instead?
> > > 
> > > ---
> > > From: Johannes Weiner <jweiner@redhat.com>
> > > Subject: mm: memcg: remove needless recursive preemption disabling
> > > 
> > > Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> > > spinlock, which implies disabled preemption.
> > > 
> > > The same goes for the explicit preemption disabling to account mapped
> > > file pages in mem_cgroup_move_account().
> > > 
> > > The explicit disabling of preemption in both cases is redundant.
> > > 
> > > Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> > 
> > Could you add comments as
> > "This operation is called under bit spin lock !" ?
> 
> I left it as is in the file-mapped case, because if you read the
> __this_cpu ops and go looking for who disables preemption, you see the
> lock_page_cgroup() immediately a few lines above.
> 
> But I agree that the charge_statistics() is much less obvious, so I
> added a line there.
> 
> Is that okay?
> 

seems nice.

Thanks,
-Kame

> > Nice catch.
> > 
> > Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
> 
> Thanks!
> 
> ---
> 
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> ---
>  mm/memcontrol.c |    1 +
>  1 file changed, 1 insertion(+)
> 
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -615,6 +615,7 @@ static unsigned long mem_cgroup_read_eve
>  	return val;
>  }
>  
> +/* Must be called with preemption disabled */
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>  					 bool file, int nr_pages)
>  {
> 


^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-09-06 10:45         ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 56+ messages in thread
From: KAMEZAWA Hiroyuki @ 2011-09-06 10:45 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, 6 Sep 2011 12:49:15 +0200
Johannes Weiner <jweiner@redhat.com> wrote:

> On Tue, Sep 06, 2011 at 07:04:24PM +0900, KAMEZAWA Hiroyuki wrote:
> > On Tue, 6 Sep 2011 11:58:52 +0200
> > Johannes Weiner <jweiner@redhat.com> wrote:
> > 
> > > On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > > > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > > > unnecessarily disabling preemption when adjusting per-cpu counters:
> > > >     preempt_disable()
> > > >     __this_cpu_xxx()
> > > >     __this_cpu_yyy()
> > > >     preempt_enable()
> > > > 
> > > > This change does not disable preemption and thus CPU switch is possible
> > > > within these routines.  This does not cause a problem because the total
> > > > of all cpu counters is summed when reporting stats.  Now both
> > > > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> > > >     this_cpu_xxx()
> > > >     this_cpu_yyy()
> > > > 
> > > > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > > > Signed-off-by: Greg Thelen <gthelen@google.com>
> > > 
> > > I just noticed that both cases have preemption disabled anyway because
> > > of the page_cgroup bit spinlock.
> > > 
> > > So removing the preempt_disable() is fine but we can even keep the
> > > non-atomic __this_cpu operations.
> > > 
> > > Something like this instead?
> > > 
> > > ---
> > > From: Johannes Weiner <jweiner@redhat.com>
> > > Subject: mm: memcg: remove needless recursive preemption disabling
> > > 
> > > Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> > > spinlock, which implies disabled preemption.
> > > 
> > > The same goes for the explicit preemption disabling to account mapped
> > > file pages in mem_cgroup_move_account().
> > > 
> > > The explicit disabling of preemption in both cases is redundant.
> > > 
> > > Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> > 
> > Could you add comments as
> > "This operation is called under bit spin lock !" ?
> 
> I left it as is in the file-mapped case, because if you read the
> __this_cpu ops and go looking for who disables preemption, you see the
> lock_page_cgroup() immediately a few lines above.
> 
> But I agree that the charge_statistics() is much less obvious, so I
> added a line there.
> 
> Is that okay?
> 

seems nice.

Thanks,
-Kame

> > Nice catch.
> > 
> > Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
> 
> Thanks!
> 
> ---
> 
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> ---
>  mm/memcontrol.c |    1 +
>  1 file changed, 1 insertion(+)
> 
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -615,6 +615,7 @@ static unsigned long mem_cgroup_read_eve
>  	return val;
>  }
>  
> +/* Must be called with preemption disabled */
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>  					 bool file, int nr_pages)
>  {
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-09-06 10:04     ` KAMEZAWA Hiroyuki
@ 2011-09-06 10:49       ` Johannes Weiner
  -1 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-09-06 10:49 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, Sep 06, 2011 at 07:04:24PM +0900, KAMEZAWA Hiroyuki wrote:
> On Tue, 6 Sep 2011 11:58:52 +0200
> Johannes Weiner <jweiner@redhat.com> wrote:
> 
> > On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > > unnecessarily disabling preemption when adjusting per-cpu counters:
> > >     preempt_disable()
> > >     __this_cpu_xxx()
> > >     __this_cpu_yyy()
> > >     preempt_enable()
> > > 
> > > This change does not disable preemption and thus CPU switch is possible
> > > within these routines.  This does not cause a problem because the total
> > > of all cpu counters is summed when reporting stats.  Now both
> > > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> > >     this_cpu_xxx()
> > >     this_cpu_yyy()
> > > 
> > > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > > Signed-off-by: Greg Thelen <gthelen@google.com>
> > 
> > I just noticed that both cases have preemption disabled anyway because
> > of the page_cgroup bit spinlock.
> > 
> > So removing the preempt_disable() is fine but we can even keep the
> > non-atomic __this_cpu operations.
> > 
> > Something like this instead?
> > 
> > ---
> > From: Johannes Weiner <jweiner@redhat.com>
> > Subject: mm: memcg: remove needless recursive preemption disabling
> > 
> > Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> > spinlock, which implies disabled preemption.
> > 
> > The same goes for the explicit preemption disabling to account mapped
> > file pages in mem_cgroup_move_account().
> > 
> > The explicit disabling of preemption in both cases is redundant.
> > 
> > Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> 
> Could you add comments as
> "This operation is called under bit spin lock !" ?

I left it as is in the file-mapped case, because if you read the
__this_cpu ops and go looking for who disables preemption, you see the
lock_page_cgroup() immediately a few lines above.

But I agree that the charge_statistics() is much less obvious, so I
added a line there.

Is that okay?

> Nice catch.
> 
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>

Thanks!

---

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
---
 mm/memcontrol.c |    1 +
 1 file changed, 1 insertion(+)

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -615,6 +615,7 @@ static unsigned long mem_cgroup_read_eve
 	return val;
 }
 
+/* Must be called with preemption disabled */
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-09-06 10:49       ` Johannes Weiner
  0 siblings, 0 replies; 56+ messages in thread
From: Johannes Weiner @ 2011-09-06 10:49 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Greg Thelen, Andrew Morton, linux-kernel, linux-mm, Balbir Singh,
	Daisuke Nishimura

On Tue, Sep 06, 2011 at 07:04:24PM +0900, KAMEZAWA Hiroyuki wrote:
> On Tue, 6 Sep 2011 11:58:52 +0200
> Johannes Weiner <jweiner@redhat.com> wrote:
> 
> > On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
> > > Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
> > > unnecessarily disabling preemption when adjusting per-cpu counters:
> > >     preempt_disable()
> > >     __this_cpu_xxx()
> > >     __this_cpu_yyy()
> > >     preempt_enable()
> > > 
> > > This change does not disable preemption and thus CPU switch is possible
> > > within these routines.  This does not cause a problem because the total
> > > of all cpu counters is summed when reporting stats.  Now both
> > > mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
> > >     this_cpu_xxx()
> > >     this_cpu_yyy()
> > > 
> > > Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> > > Signed-off-by: Greg Thelen <gthelen@google.com>
> > 
> > I just noticed that both cases have preemption disabled anyway because
> > of the page_cgroup bit spinlock.
> > 
> > So removing the preempt_disable() is fine but we can even keep the
> > non-atomic __this_cpu operations.
> > 
> > Something like this instead?
> > 
> > ---
> > From: Johannes Weiner <jweiner@redhat.com>
> > Subject: mm: memcg: remove needless recursive preemption disabling
> > 
> > Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> > spinlock, which implies disabled preemption.
> > 
> > The same goes for the explicit preemption disabling to account mapped
> > file pages in mem_cgroup_move_account().
> > 
> > The explicit disabling of preemption in both cases is redundant.
> > 
> > Signed-off-by: Johannes Weiner <jweiner@redhat.com>
> 
> Could you add comments as
> "This operation is called under bit spin lock !" ?

I left it as is in the file-mapped case, because if you read the
__this_cpu ops and go looking for who disables preemption, you see the
lock_page_cgroup() immediately a few lines above.

But I agree that the charge_statistics() is much less obvious, so I
added a line there.

Is that okay?

> Nice catch.
> 
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>

Thanks!

---

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
---
 mm/memcontrol.c |    1 +
 1 file changed, 1 insertion(+)

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -615,6 +615,7 @@ static unsigned long mem_cgroup_read_eve
 	return val;
 }
 
+/* Must be called with preemption disabled */
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 bool file, int nr_pages)
 {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
  2011-09-06  9:58   ` Johannes Weiner
@ 2011-09-06 18:04     ` Greg Thelen
  -1 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-09-06 18:04 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Tue, Sep 6, 2011 at 2:58 AM, Johannes Weiner <jweiner@redhat.com> wrote:
> On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
>> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
>> unnecessarily disabling preemption when adjusting per-cpu counters:
>>     preempt_disable()
>>     __this_cpu_xxx()
>>     __this_cpu_yyy()
>>     preempt_enable()
>>
>> This change does not disable preemption and thus CPU switch is possible
>> within these routines.  This does not cause a problem because the total
>> of all cpu counters is summed when reporting stats.  Now both
>> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>>     this_cpu_xxx()
>>     this_cpu_yyy()
>>
>> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>> Signed-off-by: Greg Thelen <gthelen@google.com>
>
> I just noticed that both cases have preemption disabled anyway because
> of the page_cgroup bit spinlock.
>
> So removing the preempt_disable() is fine but we can even keep the
> non-atomic __this_cpu operations.
>
> Something like this instead?
>
> ---
> From: Johannes Weiner <jweiner@redhat.com>
> Subject: mm: memcg: remove needless recursive preemption disabling
>
> Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> spinlock, which implies disabled preemption.
>
> The same goes for the explicit preemption disabling to account mapped
> file pages in mem_cgroup_move_account().
>
> The explicit disabling of preemption in both cases is redundant.
>
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>

Looks good, thanks.

Reviewed-by: Greg Thelen <gthelen@google.com>

> ---
>  mm/memcontrol.c |    6 ------
>  1 file changed, 6 deletions(-)
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -618,8 +618,6 @@ static unsigned long mem_cgroup_read_eve
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>                                         bool file, int nr_pages)
>  {
> -       preempt_disable();
> -
>        if (file)
>                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>        else
> @@ -634,8 +632,6 @@ static void mem_cgroup_charge_statistics
>        }
>
>        __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
> -
> -       preempt_enable();
>  }
>
>  unsigned long
> @@ -2582,10 +2578,8 @@ static int mem_cgroup_move_account(struc
>
>        if (PageCgroupFileMapped(pc)) {
>                /* Update mapped_file data for mem_cgroup */
> -               preempt_disable();
>                __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
>                __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
> -               preempt_enable();
>        }
>        mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
>        if (uncharge)
>

^ permalink raw reply	[flat|nested] 56+ messages in thread

* Re: [PATCH] memcg: remove unneeded preempt_disable
@ 2011-09-06 18:04     ` Greg Thelen
  0 siblings, 0 replies; 56+ messages in thread
From: Greg Thelen @ 2011-09-06 18:04 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Daisuke Nishimura

On Tue, Sep 6, 2011 at 2:58 AM, Johannes Weiner <jweiner@redhat.com> wrote:
> On Wed, Aug 17, 2011 at 11:50:53PM -0700, Greg Thelen wrote:
>> Both mem_cgroup_charge_statistics() and mem_cgroup_move_account() were
>> unnecessarily disabling preemption when adjusting per-cpu counters:
>>     preempt_disable()
>>     __this_cpu_xxx()
>>     __this_cpu_yyy()
>>     preempt_enable()
>>
>> This change does not disable preemption and thus CPU switch is possible
>> within these routines.  This does not cause a problem because the total
>> of all cpu counters is summed when reporting stats.  Now both
>> mem_cgroup_charge_statistics() and mem_cgroup_move_account() look like:
>>     this_cpu_xxx()
>>     this_cpu_yyy()
>>
>> Reported-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>> Signed-off-by: Greg Thelen <gthelen@google.com>
>
> I just noticed that both cases have preemption disabled anyway because
> of the page_cgroup bit spinlock.
>
> So removing the preempt_disable() is fine but we can even keep the
> non-atomic __this_cpu operations.
>
> Something like this instead?
>
> ---
> From: Johannes Weiner <jweiner@redhat.com>
> Subject: mm: memcg: remove needless recursive preemption disabling
>
> Callsites of mem_cgroup_charge_statistics() hold the page_cgroup bit
> spinlock, which implies disabled preemption.
>
> The same goes for the explicit preemption disabling to account mapped
> file pages in mem_cgroup_move_account().
>
> The explicit disabling of preemption in both cases is redundant.
>
> Signed-off-by: Johannes Weiner <jweiner@redhat.com>

Looks good, thanks.

Reviewed-by: Greg Thelen <gthelen@google.com>

> ---
>  mm/memcontrol.c |    6 ------
>  1 file changed, 6 deletions(-)
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -618,8 +618,6 @@ static unsigned long mem_cgroup_read_eve
>  static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
>                                         bool file, int nr_pages)
>  {
> -       preempt_disable();
> -
>        if (file)
>                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
>        else
> @@ -634,8 +632,6 @@ static void mem_cgroup_charge_statistics
>        }
>
>        __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
> -
> -       preempt_enable();
>  }
>
>  unsigned long
> @@ -2582,10 +2578,8 @@ static int mem_cgroup_move_account(struc
>
>        if (PageCgroupFileMapped(pc)) {
>                /* Update mapped_file data for mem_cgroup */
> -               preempt_disable();
>                __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
>                __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
> -               preempt_enable();
>        }
>        mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
>        if (uncharge)
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 56+ messages in thread

end of thread, other threads:[~2011-09-06 18:04 UTC | newest]

Thread overview: 56+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-18  6:50 [PATCH] memcg: remove unneeded preempt_disable Greg Thelen
2011-08-18  6:50 ` Greg Thelen
2011-08-18  6:52 ` KAMEZAWA Hiroyuki
2011-08-18  6:52   ` KAMEZAWA Hiroyuki
2011-08-18  9:38 ` Johannes Weiner
2011-08-18  9:38   ` Johannes Weiner
2011-08-18 14:26   ` Valdis.Kletnieks
2011-08-18 14:41     ` Johannes Weiner
2011-08-18 14:41       ` Johannes Weiner
2011-08-18 18:27       ` Valdis.Kletnieks
2011-08-18 21:40 ` Andrew Morton
2011-08-18 21:40   ` Andrew Morton
2011-08-19  0:00   ` Greg Thelen
2011-08-19  0:00     ` Greg Thelen
2011-08-25 14:57   ` Peter Zijlstra
2011-08-25 14:57     ` Peter Zijlstra
2011-08-25 15:11     ` Christoph Lameter
2011-08-25 15:11       ` Christoph Lameter
2011-08-25 16:20       ` James Bottomley
2011-08-25 16:20         ` James Bottomley
2011-08-25 16:31         ` Christoph Lameter
2011-08-25 16:31           ` Christoph Lameter
2011-08-25 16:34           ` James Bottomley
2011-08-25 16:34             ` James Bottomley
2011-08-25 17:07             ` Christoph Lameter
2011-08-25 17:07               ` Christoph Lameter
2011-08-25 18:34               ` James Bottomley
2011-08-25 18:34                 ` James Bottomley
2011-08-25 18:46                 ` Christoph Lameter
2011-08-25 18:46                   ` Christoph Lameter
2011-08-25 18:49                   ` Peter Zijlstra
2011-08-25 18:49                     ` Peter Zijlstra
2011-08-25 18:53                   ` Peter Zijlstra
2011-08-25 18:53                     ` Peter Zijlstra
2011-08-25 19:05                   ` Peter Zijlstra
2011-08-25 19:05                     ` Peter Zijlstra
2011-08-25 19:19                     ` Christoph Lameter
2011-08-25 19:19                       ` Christoph Lameter
2011-08-25 22:56                       ` Peter Zijlstra
2011-08-25 22:56                         ` Peter Zijlstra
2011-08-25 19:29                   ` James Bottomley
2011-08-25 19:29                     ` James Bottomley
2011-08-25 23:01                     ` Peter Zijlstra
2011-08-25 23:01                       ` Peter Zijlstra
2011-08-25 18:40           ` Peter Zijlstra
2011-08-25 18:40             ` Peter Zijlstra
2011-09-06  9:58 ` Johannes Weiner
2011-09-06  9:58   ` Johannes Weiner
2011-09-06 10:04   ` KAMEZAWA Hiroyuki
2011-09-06 10:04     ` KAMEZAWA Hiroyuki
2011-09-06 10:49     ` Johannes Weiner
2011-09-06 10:49       ` Johannes Weiner
2011-09-06 10:45       ` KAMEZAWA Hiroyuki
2011-09-06 10:45         ` KAMEZAWA Hiroyuki
2011-09-06 18:04   ` Greg Thelen
2011-09-06 18:04     ` Greg Thelen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.