All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] count: Employ new code-snippet scheme (cont.)
@ 2018-10-08  1:40 Akira Yokosawa
  2018-10-08  1:42 ` [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat Akira Yokosawa
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:40 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

Hi Paul,

This is the continuation of changes to snippets in the "count"
chapter.

To help your review of the changes regarding READ_ONCE()/WRITE_ONCE(),
I split the patches to the ones just applying the scheme and
the ones modifying READ_ONCE()/WRITE_ONCE().

Note on patch #3: It fixes the segfault observed in count_tstat.
However, I'm not sure if you want to take it or not. The fail can
be thought of as a Quick Quiz within the Answer to the Quick Quiz.
So keeping it as is would also be reasonable.

        Thanks, Akira
--
Akira Yokosawa (5):
  count: Employ new scheme for snippet of count_end and count_tstat
  count: Fix uses of READ/WRITE_ONCE()s in count_end and count_tstat
  count: Tweak counttorture.h to avoid segfault
  count: Employ new scheme for snippet of count_lim
  count: Fix uses of READ/WRITE_ONCE() in count_lim

 CodeSamples/count/count_end.c    |  53 ++---
 CodeSamples/count/count_lim.c    | 147 +++++++------
 CodeSamples/count/count_tstat.c  |  27 +--
 CodeSamples/count/counttorture.h |  21 +-
 count/count.tex                  | 437 +++++++++++++--------------------------
 5 files changed, 287 insertions(+), 398 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
@ 2018-10-08  1:42 ` Akira Yokosawa
  2018-10-08  1:44 ` [PATCH 2/5] count: Fix uses of READ/WRITE_ONCE()s in " Akira Yokosawa
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:42 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From eca7914e5161de426bb04a623e9586bae89d8a73 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Sat, 6 Oct 2018 18:49:11 +0900
Subject: [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
 CodeSamples/count/count_end.c   |  50 ++++++++-------
 CodeSamples/count/count_tstat.c |  15 +++--
 count/count.tex                 | 139 ++++++++++------------------------------
 3 files changed, 68 insertions(+), 136 deletions(-)

diff --git a/CodeSamples/count/count_end.c b/CodeSamples/count/count_end.c
index 00335f2..94b9273 100644
--- a/CodeSamples/count/count_end.c
+++ b/CodeSamples/count/count_end.c
@@ -21,53 +21,55 @@
 
 #include "../api.h"
 
-unsigned long __thread counter = 0;
+//\begin{snippet}[labelbase=ln:count:count_end:whole,commandchars=\\\@\$]
+unsigned long __thread counter = 0;		//\lnlbl{var:b}
 unsigned long *counterp[NR_THREADS] = { NULL };
 unsigned long finalcount = 0;
-DEFINE_SPINLOCK(final_mutex);
+DEFINE_SPINLOCK(final_mutex);			//\lnlbl{var:e}
 
-__inline__ void inc_count(void)
+static __inline__ void inc_count(void)		//\lnlbl{inc:b}
 {
 	WRITE_ONCE(counter,
 		   READ_ONCE(counter) + 1);
-}
+}						//\lnlbl{inc:e}
 
-unsigned long read_count(void)
+static __inline__ unsigned long read_count(void)
 {
 	int t;
 	unsigned long sum;
 
-	spin_lock(&final_mutex);
-	sum = finalcount;
-	for_each_thread(t)
-		if (counterp[t] != NULL)
-			sum += *counterp[t];
-	spin_unlock(&final_mutex);
-	return sum;
-}
-
-__inline__ void count_init(void)
-{
+	spin_lock(&final_mutex);			//\lnlbl{read:acquire}
+	sum = finalcount;				//\lnlbl{read:sum:init}
+	for_each_thread(t)				//\lnlbl{read:loop:b}
+		if (counterp[t] != NULL)		//\lnlbl{read:check}
+			sum += *counterp[t];		//\lnlbl{read:loop:e}
+	spin_unlock(&final_mutex);			//\lnlbl{read:release}
+	return sum;					//\lnlbl{read:return}
 }
 
-void count_register_thread(unsigned long *p)
+__inline__ void count_init(void)		//\fcvexclude
+{						//\fcvexclude
+}						//\fcvexclude
+						//\fcvexclude
+void count_register_thread(unsigned long *p)	//\lnlbl{reg:b}
 {
 	int idx = smp_thread_id();
 
 	spin_lock(&final_mutex);
 	counterp[idx] = &counter;
 	spin_unlock(&final_mutex);
-}
+}						//\lnlbl{reg:e}
 
-void count_unregister_thread(int nthreadsexpected)
+void count_unregister_thread(int nthreadsexpected)	//\lnlbl{unreg:b}
 {
 	int idx = smp_thread_id();
 
-	spin_lock(&final_mutex);
-	finalcount += counter;
-	counterp[idx] = NULL;
-	spin_unlock(&final_mutex);
-}
+	spin_lock(&final_mutex);			//\lnlbl{unreg:acquire}
+	finalcount += counter;				//\lnlbl{unreg:add}
+	counterp[idx] = NULL;				//\lnlbl{unreg:NULL}
+	spin_unlock(&final_mutex);			//\lnlbl{unreg:release}
+}							//\lnlbl{unreg:e}
+//\end{snippet}
 
 __inline__ void count_cleanup(void)
 {
diff --git a/CodeSamples/count/count_tstat.c b/CodeSamples/count/count_tstat.c
index 59e4025..4318bc3 100644
--- a/CodeSamples/count/count_tstat.c
+++ b/CodeSamples/count/count_tstat.c
@@ -22,18 +22,20 @@
 
 #include "../api.h"
 
+//\begin{snippet}[labelbase=ln:count:count_tstat:whole,commandchars=\\\@\$]
 unsigned long __thread counter = 0;
 unsigned long *counterp[NR_THREADS] = { NULL };
 int finalthreadcount = 0;
 DEFINE_SPINLOCK(final_mutex);
 
-void inc_count(void)
+static __inline__ void inc_count(void)
 {
 	WRITE_ONCE(counter,
 		   READ_ONCE(counter) + 1);
 }
 
-unsigned long read_count(void)  /* known failure with counttorture! */
+static __inline__ unsigned long read_count(void)
+                  /* known failure with counttorture! */
 {
 	int t;
 	unsigned long sum = 0;
@@ -44,10 +46,10 @@ unsigned long read_count(void)  /* known failure with counttorture! */
 	return sum;
 }
 
-void count_init(void)
-{
-}
-
+void count_init(void)		//\fcvexclude
+{				//\fcvexclude
+}				//\fcvexclude
+				//\fcvexclude
 void count_register_thread(unsigned long *p)
 {
 	counterp[smp_thread_id()] = &counter;
@@ -61,6 +63,7 @@ void count_unregister_thread(int nthreadsexpected)
 	while (finalthreadcount < nthreadsexpected)
 		poll(NULL, 0, 1);
 }
+//\end{snippet}
 
 void count_cleanup(void)
 {
diff --git a/count/count.tex b/count/count.tex
index aea7b93..a4e9c7f 100644
--- a/count/count.tex
+++ b/count/count.tex
@@ -930,55 +930,7 @@ comes at the cost of the additional thread running \co{eventual()}.
 \label{sec:count:Per-Thread-Variable-Based Implementation}
 
 \begin{listing}[tb]
-{ \scriptsize
-\begin{verbbox}
-  1 long __thread counter = 0;
-  2 long *counterp[NR_THREADS] = { NULL };
-  3 long finalcount = 0;
-  4 DEFINE_SPINLOCK(final_mutex);
-  5 
-  6 void inc_count(void)
-  7 {
-  8   WRITE_ONCE(counter,
-  9              READ_ONCE(counter) + 1);counter++;
- 10 }
- 11 
- 12 long read_count(void)
- 13 {
- 14   int t;
- 15   long sum;
- 16 
- 17   spin_lock(&final_mutex);
- 18   sum = finalcount;
- 19   for_each_thread(t)
- 20     if (counterp[t] != NULL)
- 21       sum += *counterp[t];
- 22   spin_unlock(&final_mutex);
- 23   return sum;
- 24 }
- 25 
- 26 void count_register_thread(void)
- 27 {
- 28   int idx = smp_thread_id();
- 29 
- 30   spin_lock(&final_mutex);
- 31   counterp[idx] = &counter;
- 32   spin_unlock(&final_mutex);
- 33 }
- 34 
- 35 void count_unregister_thread(int nthreadsexpected)
- 36 {
- 37   int idx = smp_thread_id();
- 38 
- 39   spin_lock(&final_mutex);
- 40   finalcount += counter;
- 41   counterp[idx] = NULL;
- 42   spin_unlock(&final_mutex);
- 43 }
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/count/count_end@whole.fcv}
 \caption{Per-Thread Statistical Counters}
 \label{lst:count:Per-Thread Statistical Counters}
 \end{listing}
@@ -992,11 +944,14 @@ a statistical counter that not only scales, but that also incurs little
 or no performance penalty to incrementers compared to simple non-atomic
 increment.
 
-Lines~1-4 define needed variables: \co{counter} is the per-thread counter
+\begin{lineref}[ln:count:count_end:whole]
+Lines~\lnref{var:b}-\lnref{var:e} define needed variables:
+\co{counter} is the per-thread counter
 variable, the \co{counterp[]} array allows threads to access each others'
 counters, \co{finalcount} accumulates the total as individual threads exit,
 and \co{final_mutex} coordinates between threads accumulating the total
 value of the counter and exiting threads.
+\end{lineref}
 
 \QuickQuiz{}
 	Why do we need an explicit array to find the other threads'
@@ -1041,24 +996,31 @@ value of the counter and exiting threads.
 	that it will someday appear.
 } \QuickQuizEnd
 
+\begin{lineref}[ln:count:count_end:whole:inc]
 The \co{inc_count()} function used by updaters is quite simple, as can
-be seen on lines~6-10.
+be seen on lines~\lnref{b}-\lnref{e}.
+\end{lineref}
 
+\begin{lineref}[ln:count:count_end:whole:read]
 The \co{read_count()} function used by readers is a bit more complex.
-Line~17 acquires a lock to exclude exiting threads, and line~22 releases
-it.
-Line~18 initializes the sum to the count accumulated by those threads that
-have already exited, and lines~19-21 sum the counts being accumulated
+Line~\lnref{acquire} acquires a lock to exclude exiting threads, and
+line~\lnref{release} releases it.
+Line~\lnref{sum:init} initializes the sum to the count accumulated by those threads that
+have already exited, and
+lines~\lnref{loop:b}-\lnref{loop:e} sum the counts being accumulated
 by threads currently running.
-Finally, line~23 returns the sum.
+Finally, line~\lnref{return} returns the sum.
+\end{lineref}
 
 \QuickQuiz{}
-	Doesn't the check for \co{NULL} on line~20 of
+	\begin{lineref}[ln:count:count_end:whole:read]
+	Doesn't the check for \co{NULL} on line~\lnref{check} of
 	Listing~\ref{lst:count:Per-Thread Statistical Counters}
 	add extra branch mispredictions?
 	Why not have a variable set permanently to zero, and point
 	unused counter-pointers to that variable rather than setting
 	them to \co{NULL}?
+	\end{lineref}
 \QuickQuizAnswer{
 	This is a reasonable strategy.
 	Checking for the performance difference is left as an exercise
@@ -1093,10 +1055,13 @@ Finally, line~23 returns the sum.
 	\co{inc_count()} fastpath.
 } \QuickQuizEnd
 
-Lines~26-33 show the \co{count_register_thread()} function, which
+\begin{lineref}[ln:count:count_end:whole:reg]
+Lines~\lnref{b}-\lnref{e} show the \co{count_register_thread()}
+function, which
 must be called by each thread before its first use of this counter.
 This function simply sets up this thread's element of the \co{counterp[]}
 array to point to its per-thread \co{counter} variable.
+\end{lineref}
 
 \QuickQuiz{}
 	Why on earth do we need to acquire the lock in
@@ -1114,18 +1079,23 @@ array to point to its per-thread \co{counter} variable.
 	a hundred or so CPUs, there is no need to get fancy.
 } \QuickQuizEnd
 
-Lines~35-43 show the \co{count_unregister_thread()} function, which
+\begin{lineref}[ln:count:count_end:whole:unreg]
+Lines~\lnref{b}-\lnref{e} show the \co{count_unregister_thread()}
+function, which
 must be called prior to exit by each thread that previously called
 \co{count_register_thread()}.
-Line~39 acquires the lock, and line~42 releases it, thus excluding any
+Line~\lnref{acquire} acquires the lock, and
+line~\lnref{release} releases it, thus excluding any
 calls to \co{read_count()} as well as other calls to
 \co{count_unregister_thread()}.
-Line~40 adds this thread's \co{counter} to the global \co{finalcount},
-and then line~41 \co{NULL}s out its \co{counterp[]} array entry.
+Line~\lnref{add} adds this thread's \co{counter} to the global
+\co{finalcount},
+and then line~\lnref{NULL} \co{NULL}s out its \co{counterp[]} array entry.
 A subsequent call to \co{read_count()} will see the exiting thread's
 count in the global \co{finalcount}, and will skip the exiting thread
 when sequencing through the \co{counterp[]} array, thus obtaining
 the correct total.
+\end{lineref}
 
 This approach gives updaters almost exactly the same performance as
 a non-atomic add, and also scales linearly.
@@ -1147,50 +1117,7 @@ variables vanish when that thread exits.
 	if the corresponding CPU never existed and never will exist.
 
 \begin{listing}[tb]
-{ \scriptsize
-\begin{verbbox}
-  1 long __thread counter = 0;
-  2 long *counterp[NR_THREADS] = { NULL };
-  3 int finalthreadcount = 0;
-  4 DEFINE_SPINLOCK(final_mutex);
-  5 
-  6 void inc_count(void)
-  7 {
-  8   counter++;
-  9 }
- 10 
- 11 long read_count(void)
- 12 {
- 13   int t;
- 14   long sum = 0;
- 15 
- 16   for_each_thread(t)
- 17     if (counterp[t] != NULL)
- 18       sum += *counterp[t];
- 19   return sum;
- 20 }
- 21 
- 22 void count_init(void)
- 23 {
- 24 }
- 25 
- 26 void count_register_thread(void)
- 27 {
- 28   counterp[smp_thread_id()] = &counter;
- 29 }
- 30 
- 31 void count_unregister_thread(int nthreadsexpected)
- 32 {
- 33   spin_lock(&final_mutex);
- 34   finalthreadcount++;
- 35   spin_unlock(&final_mutex);
- 36   while (finalthreadcount < nthreadsexpected)
- 37     poll(NULL, 0, 1);
- 38 }
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/count/count_tstat@whole.fcv}
 \caption{Per-Thread Statistical Counters With Lockless Summation}
 \label{lst:count:Per-Thread Statistical Counters With Lockless Summation}
 \end{listing}
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/5] count: Fix uses of READ/WRITE_ONCE()s in count_end and count_tstat
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
  2018-10-08  1:42 ` [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat Akira Yokosawa
@ 2018-10-08  1:44 ` Akira Yokosawa
  2018-10-08  1:46 ` [PATCH 3/5] count: Tweak counttorture.h to avoid segfault Akira Yokosawa
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:44 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From 4dc689c1439cc6f68989cdc2b5c177ec06002910 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Mon, 8 Oct 2018 09:13:15 +0900
Subject: [PATCH 2/5] count: Fix uses of READ/WRITE_ONCE()s in count_end and count_tstat

Add/remove READ_ONCE()/WRITE_ONCE()s to comply with the
guideline to be expanded in toolsoftrade.

Note on addition of READ_ONCE() while holding a lock:
In read_count() of count_end.c, READ_ONCE() is added to access
of *counterp[t] while holding final_mutex. This is because
final_mutex protects only counterp[], but per-thread variables
pointed to by the array are updated by the owning threads'
inc_count().

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
 CodeSamples/count/count_end.c   |  5 ++---
 CodeSamples/count/count_tstat.c | 11 +++++------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/CodeSamples/count/count_end.c b/CodeSamples/count/count_end.c
index 94b9273..61f9d81 100644
--- a/CodeSamples/count/count_end.c
+++ b/CodeSamples/count/count_end.c
@@ -29,8 +29,7 @@ DEFINE_SPINLOCK(final_mutex);			//\lnlbl{var:e}
 
 static __inline__ void inc_count(void)		//\lnlbl{inc:b}
 {
-	WRITE_ONCE(counter,
-		   READ_ONCE(counter) + 1);
+	WRITE_ONCE(counter, counter + 1);
 }						//\lnlbl{inc:e}
 
 static __inline__ unsigned long read_count(void)
@@ -42,7 +41,7 @@ static __inline__ unsigned long read_count(void)
 	sum = finalcount;				//\lnlbl{read:sum:init}
 	for_each_thread(t)				//\lnlbl{read:loop:b}
 		if (counterp[t] != NULL)		//\lnlbl{read:check}
-			sum += *counterp[t];		//\lnlbl{read:loop:e}
+			sum += READ_ONCE(*counterp[t]);	//\lnlbl{read:loop:e}
 	spin_unlock(&final_mutex);			//\lnlbl{read:release}
 	return sum;					//\lnlbl{read:return}
 }
diff --git a/CodeSamples/count/count_tstat.c b/CodeSamples/count/count_tstat.c
index 4318bc3..4b99415 100644
--- a/CodeSamples/count/count_tstat.c
+++ b/CodeSamples/count/count_tstat.c
@@ -30,8 +30,7 @@ DEFINE_SPINLOCK(final_mutex);
 
 static __inline__ void inc_count(void)
 {
-	WRITE_ONCE(counter,
-		   READ_ONCE(counter) + 1);
+	WRITE_ONCE(counter, counter + 1);
 }
 
 static __inline__ unsigned long read_count(void)
@@ -41,8 +40,8 @@ static __inline__ unsigned long read_count(void)
 	unsigned long sum = 0;
 
 	for_each_thread(t)
-		if (counterp[t] != NULL)
-			sum += *counterp[t];
+		if (READ_ONCE(counterp[t]) != NULL)
+			sum += READ_ONCE(*counterp[t]);
 	return sum;
 }
 
@@ -52,7 +51,7 @@ void count_init(void)		//\fcvexclude
 				//\fcvexclude
 void count_register_thread(unsigned long *p)
 {
-	counterp[smp_thread_id()] = &counter;
+	WRITE_ONCE(counterp[smp_thread_id()], &counter);
 }
 
 void count_unregister_thread(int nthreadsexpected)
@@ -60,7 +59,7 @@ void count_unregister_thread(int nthreadsexpected)
 	spin_lock(&final_mutex);
 	finalthreadcount++;
 	spin_unlock(&final_mutex);
-	while (finalthreadcount < nthreadsexpected)
+	while (READ_ONCE(finalthreadcount) < nthreadsexpected)
 		poll(NULL, 0, 1);
 }
 //\end{snippet}
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/5] count: Tweak counttorture.h to avoid segfault
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
  2018-10-08  1:42 ` [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat Akira Yokosawa
  2018-10-08  1:44 ` [PATCH 2/5] count: Fix uses of READ/WRITE_ONCE()s in " Akira Yokosawa
@ 2018-10-08  1:46 ` Akira Yokosawa
  2018-10-08  1:47 ` [PATCH 4/5] count: Employ new scheme for snippet of count_lim Akira Yokosawa
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:46 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From 19fb8b2915e5d918a6c1e98f265ab89bd373fea7 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Sat, 6 Oct 2018 23:47:44 +0900
Subject: [PATCH 3/5] count: Tweak counttorture.h to avoid segfault

As has been mentioned in comment to count_read(), count_tstat
sometimes fails. The cause is the use of count_read() in
counttorture.h after wait_all_threads() returns. Because __thread
variables are freed when owning threads exit, count_read() would
access nonexistent variables.

As a workaround, add #ifndef KEEP_GCC_THREAD_LOCAL in
counttorture.h and define _wait_all_thread(),
_count_unregister_thread(), and final_wait_all_thread() as macros
which change behavior when KEEP_GCC_THREAD_LOCAL is defined.
They permit counttorture's controlling thread to participate in
the thread group to be kept alive.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
 CodeSamples/count/count_tstat.c  |  3 ++-
 CodeSamples/count/counttorture.h | 21 ++++++++++++++++++---
 count/count.tex                  |  4 ++--
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/CodeSamples/count/count_tstat.c b/CodeSamples/count/count_tstat.c
index 4b99415..d30ee48 100644
--- a/CodeSamples/count/count_tstat.c
+++ b/CodeSamples/count/count_tstat.c
@@ -34,7 +34,7 @@ static __inline__ void inc_count(void)
 }
 
 static __inline__ unsigned long read_count(void)
-                  /* known failure with counttorture! */
+                  /* need to tweak counttorture! */
 {
 	int t;
 	unsigned long sum = 0;
@@ -69,4 +69,5 @@ void count_cleanup(void)
 }
 
 #define NEED_REGISTER_THREAD
+#define KEEP_GCC_THREAD_LOCAL
 #include "counttorture.h"
diff --git a/CodeSamples/count/counttorture.h b/CodeSamples/count/counttorture.h
index bdfc7d4..60d1d11 100644
--- a/CodeSamples/count/counttorture.h
+++ b/CodeSamples/count/counttorture.h
@@ -64,6 +64,20 @@ int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
 #define count_unregister_thread(n)	do ; while (0)
 #endif /* #ifndef NEED_REGISTER_THREAD */
 
+#ifndef KEEP_GCC_THREAD_LOCAL
+#define _wait_all_threads() wait_all_threads()
+#define _count_unregister_thread(n) count_unregister_thread(n)
+#define final_wait_all_threads()
+#else  /* #ifndef KEEP_GCC_THREAD_LOCAL */
+#define _wait_all_threads() { \
+	while (READ_ONCE(finalthreadcount) < nthreadsexpected) \
+		poll(NULL, 0, 1);}
+#define _count_unregister_thread(n) count_unregister_thread(n + 1)
+#define final_wait_all_threads() { \
+	WRITE_ONCE(finalthreadcount, nthreadsexpected + 1); \
+	wait_all_threads();}
+#endif /* #ifndef KEEP_GCC_THREAD_LOCAL */
+
 unsigned long garbage = 0; /* disable compiler optimizations. */
 
 /*
@@ -90,7 +104,7 @@ void *count_read_perf_test(void *arg)
 		n_reads_local += COUNT_READ_RUN;
 	}
 	__get_thread_var(n_reads_pt) += n_reads_local;
-	count_unregister_thread(nthreadsexpected);
+	_count_unregister_thread(nthreadsexpected);
 	garbage += j;
 
 	return (NULL);
@@ -113,7 +127,7 @@ void *count_update_perf_test(void *arg)
 		n_updates_local += COUNT_UPDATE_RUN;
 	}
 	__get_thread_var(n_updates_pt) += n_updates_local;
-	count_unregister_thread(nthreadsexpected);
+	_count_unregister_thread(nthreadsexpected);
 	return NULL;
 }
 
@@ -139,7 +153,7 @@ void perftestrun(int nthreads, int nreaders, int nupdaters)
 	smp_mb();
 	goflag = GOFLAG_STOP;
 	smp_mb();
-	wait_all_threads();
+	_wait_all_threads();
 	count_cleanup();
 	for_each_thread(t) {
 		n_reads += per_thread(n_reads_pt, t);
@@ -159,6 +173,7 @@ void perftestrun(int nthreads, int nreaders, int nupdaters)
 	        (double)n_reads),
 	       ((duration * 1000*1000.*(double)nupdaters) /
 	        (double)n_updates));
+	final_wait_all_threads();
 	exit(EXIT_SUCCESS);
 }
 
diff --git a/count/count.tex b/count/count.tex
index a4e9c7f..b008a19 100644
--- a/count/count.tex
+++ b/count/count.tex
@@ -1127,9 +1127,9 @@ variables vanish when that thread exits.
 	Listing~\ref{lst:count:Per-Thread Statistical Counters With Lockless Summation}
 	(\path{count_tstat.c}).
 	Analysis of this code is left as an exercise to the reader,
-	however, please note that it does not fit well into the
+	however, please note that it requires tweaks in the
 	\path{counttorture.h} counter-evaluation scheme.
-	(Why not?)
+	(Hint: See \co{#ifndef KEEP_GCC_THREAD_LOCAL}.)
 	Chapter~\ref{chp:Deferred Processing} will introduce 
 	synchronization mechanisms that handle this situation in a much
 	more graceful manner.
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/5] count: Employ new scheme for snippet of count_lim
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
                   ` (2 preceding siblings ...)
  2018-10-08  1:46 ` [PATCH 3/5] count: Tweak counttorture.h to avoid segfault Akira Yokosawa
@ 2018-10-08  1:47 ` Akira Yokosawa
  2018-10-08  1:49 ` [PATCH 5/5] count: Fix uses of READ/WRITE_ONCE() in count_lim Akira Yokosawa
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:47 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From 39a3830b185c9f4f6d65ac9162d9981522ed5b94 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Mon, 8 Oct 2018 10:23:32 +0900
Subject: [PATCH 4/5] count: Employ new scheme for snippet of count_lim

NOTE: To bundle code for the "Utility Functions" snippet,
definition of globalize_count() and balance_count() is deferred
to behind that of read_count().

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
 CodeSamples/count/count_lim.c | 147 +++++++++++----------
 count/count.tex               | 294 ++++++++++++++++--------------------------
 2 files changed, 192 insertions(+), 249 deletions(-)

diff --git a/CodeSamples/count/count_lim.c b/CodeSamples/count/count_lim.c
index 4cc8595..69ced9a 100644
--- a/CodeSamples/count/count_lim.c
+++ b/CodeSamples/count/count_lim.c
@@ -20,105 +20,116 @@
 
 #include "../api.h"
 
+static __inline__ void globalize_count(void);
+static __inline__ void balance_count(void);
+
+//\begin{snippet}[labelbase=ln:count:count_lim:variable,commandchars=\\\@\$]
 unsigned long __thread counter = 0;
 unsigned long __thread countermax = 0;
-unsigned long globalcountmax = 10000;
-unsigned long globalcount = 0;
-unsigned long globalreserve = 0;
+unsigned long globalcountmax = 10000;		//\lnlbl{globalcountmax}
+unsigned long globalcount = 0;			//\lnlbl{globalcount}
+unsigned long globalreserve = 0;		//\lnlbl{globalreserve}
 unsigned long *counterp[NR_THREADS] = { NULL };
 DEFINE_SPINLOCK(gblcnt_mutex);
+//\end{snippet}
 
-static void globalize_count(void)
-{
-	globalcount += counter;
-	counter = 0;
-	globalreserve -= countermax;
-	countermax = 0;
-}
-
-static void balance_count(void)
-{
-	countermax = globalcountmax - globalcount - globalreserve;
-	countermax /= num_online_threads();
-	globalreserve += countermax;
-	counter = countermax / 2;
-	if (counter > globalcount)
-		counter = globalcount;
-	globalcount -= counter;
-}
-
-int add_count(unsigned long delta)
+//\begin{snippet}[labelbase=ln:count:count_lim:add_sub_read,commandchars=\\\@\$]
+static __inline__ int add_count(unsigned long delta)	//\lnlbl{add:b}
 {
-	if (countermax - counter >= delta) {
-		counter += delta;
-		return 1;
+	if (countermax - counter >= delta) {		//\lnlbl{add:checklocal}
+		counter += delta);			//\lnlbl{add:add}
+		return 1;				//\lnlbl{add:return:ls}
 	}
-	spin_lock(&gblcnt_mutex);
-	globalize_count();
-	if (globalcountmax - globalcount - globalreserve < delta) {
-		spin_unlock(&gblcnt_mutex);
-		return 0;
+	spin_lock(&gblcnt_mutex);			//\lnlbl{add:acquire}
+	globalize_count();				//\lnlbl{add:globalize}
+	if (globalcountmax -				//\lnlbl{add:checkglb:b}
+	    globalcount - globalreserve < delta) {	//\lnlbl{add:checkglb:e}
+		spin_unlock(&gblcnt_mutex);		//\lnlbl{add:release:f}
+		return 0;				//\lnlbl{add:return:gf}
 	}
-	globalcount += delta;
-	balance_count();
-	spin_unlock(&gblcnt_mutex);
-	return 1;
-}
+	globalcount += delta;				//\lnlbl{add:addglb}
+	balance_count();				//\lnlbl{add:balance}
+	spin_unlock(&gblcnt_mutex);			//\lnlbl{add:release:s}
+	return 1;					//\lnlbl{add:return:gs}
+}							//\lnlbl{add:e}
 
-int sub_count(unsigned long delta)
+static __inline__ int sub_count(unsigned long delta)	//\lnlbl{sub:b}
 {
-	if (counter >= delta) {
-		counter -= delta;
-		return 1;
+	if (counter >= delta) {				//\lnlbl{sub:checklocal}
+		counter -= delta;			//\lnlbl{sub:sub}
+		return 1;				//\lnlbl{sub:return:ls}
 	}
-	spin_lock(&gblcnt_mutex);
-	globalize_count();
-	if (globalcount < delta) {
-		spin_unlock(&gblcnt_mutex);
-		return 0;
+	spin_lock(&gblcnt_mutex);			//\lnlbl{sub:acquire}
+	globalize_count();				//\lnlbl{sub:globalize}
+	if (globalcount < delta) {			//\lnlbl{sub:checkglb}
+		spin_unlock(&gblcnt_mutex);		//\lnlbl{sub:release:f}
+		return 0;				//\lnlbl{sub:return:gf}
 	}
-	globalcount -= delta;
-	balance_count();
-	spin_unlock(&gblcnt_mutex);
-	return 1;
-}
+	globalcount -= delta;				//\lnlbl{sub:subglb}
+	balance_count();				//\lnlbl{sub:balance}
+	spin_unlock(&gblcnt_mutex);			//\lnlbl{sub:release:s}
+	return 1;					//\lnlbl{sub:return:gs}
+}							//\lnlbl{sub:e}
 
-unsigned long read_count(void)
+static __inline__ unsigned long read_count(void)	//\lnlbl{read:b}
 {
 	int t;
 	unsigned long sum;
 
-	spin_lock(&gblcnt_mutex);
-	sum = globalcount;
-	for_each_thread(t)
+	spin_lock(&gblcnt_mutex);			//\lnlbl{read:acquire}
+	sum = globalcount;				//\lnlbl{read:initsum}
+	for_each_thread(t)				//\lnlbl{read:loop:b}
 		if (counterp[t] != NULL)
-			sum += *counterp[t];
-	spin_unlock(&gblcnt_mutex);
-	return sum;
-}
+			sum += *counterp[t];		//\lnlbl{read:loop:e}
+	spin_unlock(&gblcnt_mutex);			//\lnlbl{read:release}
+	return sum;					//\lnlbl{read:return}
+}							//\lnlbl{read:e}
+//\end{snippet}
 
-void count_init(void)
+//\begin{snippet}[labelbase=ln:count:count_lim:utility,commandchars=\\\@\$]
+static __inline__ void globalize_count(void)	//\lnlbl{globalize:b}
 {
-}
+	globalcount += counter;			//\lnlbl{globalize:add}
+	counter = 0;				//\lnlbl{globalize:zero}
+	globalreserve -= countermax;		//\lnlbl{globalize:sub}
+	countermax = 0;			//\lnlbl{globalize:zeromax}
+}						//\lnlbl{globalize:e}
 
-void count_register_thread(void)
+static __inline__ void balance_count(void)	//\lnlbl{balance:b}
+{
+	countermax = globalcountmax -		//\lnlbl{balance:share:b}
+	             globalcount - globalreserve;
+	countermax /= num_online_threads();	//\lnlbl{balance:share:e}
+	globalreserve += countermax;		//\lnlbl{balance:adjreserve}
+	counter = countermax / 2;		//\lnlbl{balance:middle}
+	if (counter > globalcount)		//\lnlbl{balance:check}
+		counter = globalcount;		//\lnlbl{balance:adjcounter}
+	globalcount -= counter;			//\lnlbl{balance:adjglobal}
+}						//\lnlbl{balance:e}
+
+void count_init(void)			//\fcvexclude
+{					//\fcvexclude
+}					//\fcvexclude
+					//\fcvexclude
+void count_register_thread(void)	//\lnlbl{register:b}
 {
 	int idx = smp_thread_id();
 
 	spin_lock(&gblcnt_mutex);
 	counterp[idx] = &counter;
 	spin_unlock(&gblcnt_mutex);
-}
+}					//\lnlbl{register:e}
 
-void count_unregister_thread(int nthreadsexpected)
+void count_unregister_thread(int nthreadsexpected)	//\lnlbl{unregister:b}
 {
 	int idx = smp_thread_id();
 
-	spin_lock(&gblcnt_mutex);
-	globalize_count();
-	counterp[idx] = NULL;
-	spin_unlock(&gblcnt_mutex);
-}
+	spin_lock(&gblcnt_mutex);		//\lnlbl{unregister:acquire}
+	globalize_count();			//\lnlbl{unregister:globalize}
+	counterp[idx] = NULL;			//\lnlbl{unregister:clear}
+	spin_unlock(&gblcnt_mutex);		//\lnlbl{unregister:release}
+}						//\lnlbl{unregister:e}
+//\end{snippet}
 
 void count_cleanup(void)
 {
diff --git a/count/count.tex b/count/count.tex
index b008a19..db2107e 100644
--- a/count/count.tex
+++ b/count/count.tex
@@ -1312,19 +1312,7 @@ Section~\ref{sec:SMPdesign:Parallel Fastpath}.
 \label{sec:count:Simple Limit Counter Implementation}
 
 \begin{listing}[tbp]
-{ \scriptsize
-\begin{verbbox}
-  1 unsigned long __thread counter = 0;
-  2 unsigned long __thread countermax = 0;
-  3 unsigned long globalcountmax = 10000;
-  4 unsigned long globalcount = 0;
-  5 unsigned long globalreserve = 0;
-  6 unsigned long *counterp[NR_THREADS] = { NULL };
-  7 DEFINE_SPINLOCK(gblcnt_mutex);
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/count/count_lim@variable.fcv}
 \caption{Simple Limit Counter Variables}
 \label{lst:count:Simple Limit Counter Variables}
 \end{listing}
@@ -1336,19 +1324,23 @@ Section~\ref{sec:SMPdesign:Parallel Fastpath}.
 \label{fig:count:Simple Limit Counter Variable Relationships}
 \end{figure}
 
+\begin{lineref}[ln:count:count_lim:variable]
 Listing~\ref{lst:count:Simple Limit Counter Variables}
 shows both the per-thread and global variables used by this
 implementation.
 The per-thread \co{counter} and \co{countermax} variables are the
 corresponding thread's local counter and the upper bound on that
 counter, respectively.
-The \co{globalcountmax} variable on line~3 contains the upper
+The \co{globalcountmax} variable on
+line~\lnref{globalcountmax} contains the upper
 bound for the aggregate counter, and the \co{globalcount} variable
-on line~4 is the global counter.
+on line~\lnref{globalcount} is the global counter.
 The sum of \co{globalcount} and each thread's \co{counter} gives
 the aggregate value of the overall counter.
-The \co{globalreserve} variable on line~5 is the sum of all of the
+The \co{globalreserve} variable on
+line~\lnref{globalreserve} is the sum of all of the
 per-thread \co{countermax} variables.
+\end{lineref}
 The relationship among these variables is shown by
 Figure~\ref{fig:count:Simple Limit Counter Variable Relationships}:
 \begin{enumerate}
@@ -1367,62 +1359,7 @@ is permitted to access or modify any of the global variables unless it
 has acquired \co{gblcnt_mutex}.
 
 \begin{listing}[tbp]
-{ \scriptsize
-\begin{verbbox}
-  1 int add_count(unsigned long delta)
-  2 {
-  3   if (countermax - counter >= delta) {
-  4     counter += delta;
-  5     return 1;
-  6   }
-  7   spin_lock(&gblcnt_mutex);
-  8   globalize_count();
-  9   if (globalcountmax -
- 10       globalcount - globalreserve < delta) {
- 11     spin_unlock(&gblcnt_mutex);
- 12     return 0;
- 13   }
- 14   globalcount += delta;
- 15   balance_count();
- 16   spin_unlock(&gblcnt_mutex);
- 17   return 1;
- 18 }
- 19 
- 20 int sub_count(unsigned long delta)
- 21 {
- 22   if (counter >= delta) {
- 23     counter -= delta;
- 24     return 1;
- 25   }
- 26   spin_lock(&gblcnt_mutex);
- 27   globalize_count();
- 28   if (globalcount < delta) {
- 29     spin_unlock(&gblcnt_mutex);
- 30     return 0;
- 31   }
- 32   globalcount -= delta;
- 33   balance_count();
- 34   spin_unlock(&gblcnt_mutex);
- 35   return 1;
- 36 }
- 37 
- 38 unsigned long read_count(void)
- 39 {
- 40   int t;
- 41   unsigned long sum;
- 42 
- 43   spin_lock(&gblcnt_mutex);
- 44   sum = globalcount;
- 45   for_each_thread(t)
- 46     if (counterp[t] != NULL)
- 47       sum += *counterp[t];
- 48   spin_unlock(&gblcnt_mutex);
- 49   return sum;
- 50 }
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/count/count_lim@add_sub_read.fcv}
 \caption{Simple Limit Counter Add, Subtract, and Read}
 \label{lst:count:Simple Limit Counter Add, Subtract, and Read}
 \end{listing}
@@ -1445,29 +1382,31 @@ functions (\path{count_lim.c}).
 	\co{inc_count()} and \co{dec_count()}.
 } \QuickQuizEnd
 
-Lines~1-18 show \co{add_count()}, which adds the specified value \co{delta}
+\begin{lineref}[ln:count:count_lim:add_sub_read:add]
+Lines~\lnref{b}-\lnref{e} show \co{add_count()},
+which adds the specified value \co{delta}
 to the counter.
-Line~3 checks to see if there is room for \co{delta} on this thread's
-\co{counter}, and, if so, line~4 adds it and line~6 returns success.
+Line~\lnref{checklocal} checks to see if there is room for
+\co{delta} on this thread's
+\co{counter}, and, if so,
+line~\lnref{add} adds it and line~\lnref{return:ls} returns success.
 This is the \co{add_counter()} fastpath, and it does no atomic operations,
 references only per-thread variables, and should not incur any cache misses.
+\end{lineref}
 
 \QuickQuiz{}
-	What is with the strange form of the condition on line~3 of
+	What is with the strange form of the condition on
+	line~\ref{ln:count:count_lim:add_sub_read:add:checklocal} of
 	Listing~\ref{lst:count:Simple Limit Counter Add, Subtract, and Read}?
 	Why not the following more intuitive form of the fastpath?
 
-	\vspace{5pt}
-	\begin{minipage}[t]{\columnwidth}
-	\small
-	\begin{verbatim}
-  3 if (counter + delta <= countermax) {
-  4   counter += delta;
-  5   return 1;
-  6 }
-	\end{verbatim}
-	\end{minipage}
-	\vspace{5pt}
+\begin{VerbatimN}[firstnumber=3]
+if (counter + delta <= countermax) {
+	counter += delta;
+	return 1;
+}
+\end{VerbatimN}
+\vspace{-9pt}
 \QuickQuizAnswer{
 	Two words.
 	``Integer overflow.''
@@ -1485,32 +1424,38 @@ references only per-thread variables, and should not incur any cache misses.
 	than parallel algorithms!
 } \QuickQuizEnd
 
-If the test on line~3 fails, we must access global variables, and thus
-must acquire \co{gblcnt_mutex} on line~7, which we release on line~11
-in the failure case or on line~16 in the success case.
-Line~8 invokes \co{globalize_count()}, shown in
+\begin{lineref}[ln:count:count_lim:add_sub_read:add]
+If the test on
+line~\lnref{checklocal} fails, we must access global variables, and thus
+must acquire \co{gblcnt_mutex} on
+line~\lnref{acquire}, which we release on line~\lnref{release:f}
+in the failure case or on line~\lnref{release:s} in the success case.
+Line~\lnref{globalize} invokes \co{globalize_count()}, shown in
 Listing~\ref{lst:count:Simple Limit Counter Utility Functions},
 which clears the thread-local variables, adjusting the global variables
 as needed, thus simplifying global processing.
 (But don't take \emph{my} word for it, try coding it yourself!)
-Lines~9 and~10 check to see if addition of \co{delta} can be accommodated,
+Lines~\lnref{checkglb:b} and~\lnref{checkglb:e} check to see
+if addition of \co{delta} can be accommodated,
 with the meaning of the expression preceding the less-than sign shown in
 Figure~\ref{fig:count:Simple Limit Counter Variable Relationships}
 as the difference in height of the two red (leftmost) bars.
 If the addition of \co{delta} cannot be accommodated, then
-line~11 (as noted earlier) releases \co{gblcnt_mutex} and line~12
+line~\lnref{release:f} (as noted earlier) releases \co{gblcnt_mutex} and
+line~\lnref{return:gf}
 returns indicating failure.
 
 Otherwise, we take the slowpath.
-Line~14 adds \co{delta} to \co{globalcount}, and then
-line~15 invokes \co{balance_count()} (shown in
+Line~\lnref{addglb} adds \co{delta} to \co{globalcount}, and then
+line~\lnref{balance} invokes \co{balance_count()} (shown in
 Listing~\ref{lst:count:Simple Limit Counter Utility Functions})
 in order to update both the global and the per-thread variables.
 This call to \co{balance_count()}
 will usually set this thread's \co{countermax} to re-enable the fastpath.
-Line~16 then releases
+Line~\lnref{release:s} then releases
 \co{gblcnt_mutex} (again, as noted earlier), and, finally,
-line~17 returns indicating success.
+line~\lnref{return:gs} returns indicating success.
+\end{lineref}
 
 \QuickQuiz{}
 	Why does \co{globalize_count()} zero the per-thread variables,
@@ -1525,26 +1470,30 @@ line~17 returns indicating success.
 	overflow!
 } \QuickQuizEnd
 
-Lines~20-36 show \co{sub_count()}, which subtracts the specified
+\begin{lineref}[ln:count:count_lim:add_sub_read:sub]
+Lines~\lnref{b}-\lnref{e} show \co{sub_count()},
+which subtracts the specified
 \co{delta} from the counter.
-Line~22 checks to see if the per-thread counter can accommodate
-this subtraction, and, if so, line~23 does the subtraction and
-line~24 returns success.
+Line~\lnref{checklocal} checks to see if the per-thread counter can accommodate
+this subtraction, and, if so, line~\lnref{sub} does the subtraction and
+line~\lnref{return:ls} returns success.
 These lines form \co{sub_count()}'s fastpath, and, as with
 \co{add_count()}, this fastpath executes no costly operations.
 
 If the fastpath cannot accommodate subtraction of \co{delta},
-execution proceeds to the slowpath on lines~26-35.
-Because the slowpath must access global state, line~26
-acquires \co{gblcnt_mutex}, which is released either by line~29
-(in case of failure) or by line~34 (in case of success).
-Line~27 invokes \co{globalize_count()}, shown in
+execution proceeds to the slowpath on
+lines~\lnlbl{acquire}-\lnlbl{return:gs}.
+Because the slowpath must access global state, line~\lnref{acquire}
+acquires \co{gblcnt_mutex}, which is released either by line~\lnref{release:f}
+(in case of failure) or by line~\lnref{release:s} (in case of success).
+Line~\lnref{globalize} invokes \co{globalize_count()}, shown in
 Listing~\ref{lst:count:Simple Limit Counter Utility Functions},
 which again clears the thread-local variables, adjusting the global variables
 as needed.
-Line~28 checks to see if the counter can accommodate subtracting
-\co{delta}, and, if not, line~29 releases \co{gblcnt_mutex}
-(as noted earlier) and line~30 returns failure.
+Line~\lnref{checkglb} checks to see if the counter can accommodate subtracting
+\co{delta}, and, if not, line~\lnref{release:f} releases \co{gblcnt_mutex}
+(as noted earlier) and line~\lnref{return:gf} returns failure.
+\end{lineref}
 
 \QuickQuiz{}
 	Given that \co{globalreserve} counted against us in \co{add_count()},
@@ -1577,14 +1526,17 @@ Line~28 checks to see if the counter can accommodate subtracting
 	will likely be preferable.
 } \QuickQuizEnd
 
-If, on the other hand, line~28 finds that the counter \emph{can}
+\begin{lineref}[ln:count:count_lim:add_sub_read:sub]
+If, on the other hand, line~\lnref{checkglb} finds that the counter \emph{can}
 accommodate subtracting \co{delta}, we complete the slowpath.
-Line~32 does the subtraction and then
-line~33 invokes \co{balance_count()} (shown in
+Line~\lnref{subglb} does the subtraction and then
+line~\lnref{balance} invokes \co{balance_count()} (shown in
 Listing~\ref{lst:count:Simple Limit Counter Utility Functions})
 in order to update both global and per-thread variables
 (hopefully re-enabling the fastpath).
-Then line~34 releases \co{gblcnt_mutex}, and line~35 returns success.
+Then line~\lnref{release:s} releases \co{gblcnt_mutex}, and
+line~\lnref{return:gs} returns success.
+\end{lineref}
 
 \QuickQuiz{}
 	Why have both \co{add_count()} and \co{sub_count()} in
@@ -1599,61 +1551,23 @@ Then line~34 releases \co{gblcnt_mutex}, and line~35 returns success.
 	of structures in use!
 } \QuickQuizEnd
 
-Lines~38-50 show \co{read_count()}, which returns the aggregate value
+\begin{lineref}[ln:count:count_lim:add_sub_read:read]
+Lines~\lnref{b}-\lnref{e} show \co{read_count()},
+which returns the aggregate value
 of the counter.
-It acquires \co{gblcnt_mutex} on line~43 and releases it on line~48,
+It acquires \co{gblcnt_mutex} on line~\lnref{acquire}
+and releases it on line~\lnref{release},
 excluding global operations from \co{add_count()} and \co{sub_count()},
 and, as we will see, also excluding thread creation and exit.
-Line~44 initializes local variable \co{sum} to the value of
-\co{globalcount}, and then the loop spanning lines~45-47 sums the
+Line~\lnref{initsum} initializes local variable \co{sum} to the value of
+\co{globalcount}, and then the loop spanning
+lines~\lnref{loop:b}-\lnref{loop:e} sums the
 per-thread \co{counter} variables.
-Line~49 then returns the sum.
+Line~\lnref{return} then returns the sum.
+\end{lineref}
 
 \begin{listing}[tbp]
-{ \scriptsize
-\begin{verbbox}
-  1 static void globalize_count(void)
-  2 {
-  3   globalcount += counter;
-  4   counter = 0;
-  5   globalreserve -= countermax;
-  6   countermax = 0;
-  7 }
-  8 
-  9 static void balance_count(void)
- 10 {
- 11   countermax = globalcountmax -
- 12                globalcount - globalreserve;
- 13   countermax /= num_online_threads();
- 14   globalreserve += countermax;
- 15   counter = countermax / 2;
- 16   if (counter > globalcount)
- 17     counter = globalcount;
- 18   globalcount -= counter;
- 19 }
- 20 
- 21 void count_register_thread(void)
- 22 {
- 23   int idx = smp_thread_id();
- 24 
- 25   spin_lock(&gblcnt_mutex);
- 26   counterp[idx] = &counter;
- 27   spin_unlock(&gblcnt_mutex);
- 28 }
- 29 
- 30 void count_unregister_thread(int nthreadsexpected)
- 31 {
- 32   int idx = smp_thread_id();
- 33 
- 34   spin_lock(&gblcnt_mutex);
- 35   globalize_count();
- 36   counterp[idx] = NULL;
- 37   spin_unlock(&gblcnt_mutex);
- 38 }
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/count/count_lim@utility.fcv}
 \caption{Simple Limit Counter Utility Functions}
 \label{lst:count:Simple Limit Counter Utility Functions}
 \end{listing}
@@ -1663,20 +1577,25 @@ shows a number of utility functions used by the \co{add_count()},
 \co{sub_count()}, and \co{read_count()} primitives shown in
 Listing~\ref{lst:count:Simple Limit Counter Add, Subtract, and Read}.
 
-Lines~1-7 show \co{globalize_count()}, which zeros the current thread's
+\begin{lineref}[ln:count:count_lim:utility:globalize]
+Lines~\lnref{b}-\lnref{e} show \co{globalize_count()},
+which zeros the current thread's
 per-thread counters, adjusting the global variables appropriately.
 It is important to note that this function does not change the aggregate
 value of the counter, but instead changes how the counter's current value
 is represented.
-Line~3 adds the thread's \co{counter} variable to \co{globalcount},
-and line~4 zeroes \co{counter}.
-Similarly, line~5 subtracts the per-thread \co{countermax} from
-\co{globalreserve}, and line~6 zeroes \co{countermax}.
+Line~\lnref{add} adds the thread's \co{counter} variable to \co{globalcount},
+and line~\lnref{zero} zeroes \co{counter}.
+Similarly, line~\lnref{sub} subtracts the per-thread \co{countermax} from
+\co{globalreserve}, and line~\lnref{zeromax} zeroes \co{countermax}.
 It is helpful to refer to
 Figure~\ref{fig:count:Simple Limit Counter Variable Relationships}
 when reading both this function and \co{balance_count()}, which is next.
+\end{lineref}
 
-Lines~9-19 show \co{balance_count()}, which is roughly speaking
+\begin{lineref}[ln:count:count_lim:utility:balance]
+Lines~\lnref{b}-\lnref{e} show \co{balance_count()},
+which is roughly speaking
 the inverse of \co{globalize_count()}.
 This function's job is to set the current thread's
 \co{countermax} variable to the largest value that avoids the risk
@@ -1690,26 +1609,31 @@ By doing this, \co{balance_count()} maximizes use of
 As with \co{globalize_count()}, \co{balance_count()} is not permitted
 to change the aggregate value of the counter.
 
-Lines~11-13 compute this thread's share of that portion of
+Lines~\lnref{share:b}-\lnref{share:e} compute this thread's share of
+that portion of
 \co{globalcountmax} that is not already covered by either
 \co{globalcount} or \co{globalreserve}, and assign the
 computed quantity to this thread's \co{countermax}.
-Line~14 makes the corresponding adjustment to \co{globalreserve}.
-Line~15 sets this thread's \co{counter} to the middle of the range
+Line~\lnref{adjreserve} makes the corresponding adjustment to \co{globalreserve}.
+Line~\lnref{middle} sets this thread's \co{counter} to the middle of the range
 from zero to \co{countermax}.
-Line~16 checks to see whether \co{globalcount} can in fact accommodate
-this value of \co{counter}, and, if not, line~17 decreases \co{counter}
+Line~\lnref{check} checks to see whether \co{globalcount} can in fact accommodate
+this value of \co{counter}, and, if not,
+line~\lnref{adjcounter} decreases \co{counter}
 accordingly.
-Finally, in either case, line~18 makes the corresponding adjustment to
+Finally, in either case,
+line~\lnref{adjglobal} makes the corresponding adjustment to
 \co{globalcount}.
+\end{lineref}
 
 \QuickQuiz{}
-	Why set \co{counter} to \co{countermax / 2} in line~15 of
+	Why set \co{counter} to \co{countermax / 2} in
+	line~\ref{ln:count:count_lim:utility:balance:middle} of
 	Listing~\ref{lst:count:Simple Limit Counter Utility Functions}?
 	Wouldn't it be simpler to just take \co{countermax} counts?
 \QuickQuizAnswer{
 	First, it really is reserving \co{countermax} counts
-	(see line~14), however,
+	(see line~\ref{ln:count:count_lim:utility:balance:adjreserve}), however,
 	it adjusts so that only half of these are actually in use
 	by the thread at the moment.
 	This allows the thread to carry out at least \co{countermax / 2}
@@ -1789,19 +1713,27 @@ thread~0 can once again increment the counter locally.
 	what it does.
 } \QuickQuizEnd
 
-Lines~21-28 show \co{count_register_thread()}, which sets up state for
+\begin{lineref}[ln:count:count_lim:utility:register]
+Lines~\lnref{b}-\lnref{e} show \co{count_register_thread()},
+which sets up state for
 newly created threads.
 This function simply installs
 a pointer to the newly created thread's \co{counter} variable into
 the corresponding entry of the \co{counterp[]} array under the protection
 of \co{gblcnt_mutex}.
+\end{lineref}
 
-Finally, lines~30-38 show \co{count_unregister_thread()}, which tears down
+\begin{lineref}[ln:count:count_lim:utility:unregister]
+Finally, lines~\lnref{b}-\lnref{e} show \co{count_unregister_thread()},
+which tears down
 state for a soon-to-be-exiting thread.
-Line~34 acquires \co{gblcnt_mutex} and line~37 releases it.
-Line~35 invokes \co{globalize_count()} to clear out this thread's
-counter state, and line~36 clears this thread's entry in the
+Line~\lnref{acquire} acquires \co{gblcnt_mutex} and
+line~\lnref{release} releases it.
+Line~\lnref{globalize} invokes \co{globalize_count()}
+to clear out this thread's
+counter state, and line~\lnref{clear} clears this thread's entry in the
 \co{counterp[]} array.
+\end{lineref}
 
 \subsection{Simple Limit Counter Discussion}
 \label{sec:count:Simple Limit Counter Discussion}
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/5] count: Fix uses of READ/WRITE_ONCE() in count_lim
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
                   ` (3 preceding siblings ...)
  2018-10-08  1:47 ` [PATCH 4/5] count: Employ new scheme for snippet of count_lim Akira Yokosawa
@ 2018-10-08  1:49 ` Akira Yokosawa
  2018-10-08 14:02 ` [PATCH 6/5] count: Fix typo (\lnlbl{} -> \lnref{}) Akira Yokosawa
  2018-10-09  3:54 ` [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Paul E. McKenney
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08  1:49 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From 83f496142e594e69f89b154bebcfd8d278ac7831 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Mon, 8 Oct 2018 10:26:24 +0900
Subject: [PATCH 5/5] count: Fix uses of READ/WRITE_ONCE() in count_lim

Add a few READ_ONCE()/WRITE_ONCE()s to prevent load/store tearing
of per-thread variables "counter" and "countermax".

NOTE: Updates of those per-thread variables in utility functions
don't need WRITE_ONCE()s because they are protected by glbcnt_mutex
and can't be seen by read_count().

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
 CodeSamples/count/count_lim.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CodeSamples/count/count_lim.c b/CodeSamples/count/count_lim.c
index 69ced9a..c207beb 100644
--- a/CodeSamples/count/count_lim.c
+++ b/CodeSamples/count/count_lim.c
@@ -37,7 +37,7 @@ DEFINE_SPINLOCK(gblcnt_mutex);
 static __inline__ int add_count(unsigned long delta)	//\lnlbl{add:b}
 {
 	if (countermax - counter >= delta) {		//\lnlbl{add:checklocal}
-		counter += delta);			//\lnlbl{add:add}
+		WRITE_ONCE(counter, counter + delta);	//\lnlbl{add:add}
 		return 1;				//\lnlbl{add:return:ls}
 	}
 	spin_lock(&gblcnt_mutex);			//\lnlbl{add:acquire}
@@ -56,7 +56,7 @@ static __inline__ int add_count(unsigned long delta)	//\lnlbl{add:b}
 static __inline__ int sub_count(unsigned long delta)	//\lnlbl{sub:b}
 {
 	if (counter >= delta) {				//\lnlbl{sub:checklocal}
-		counter -= delta;			//\lnlbl{sub:sub}
+		WRITE_ONCE(counter, counter - delta);	//\lnlbl{sub:sub}
 		return 1;				//\lnlbl{sub:return:ls}
 	}
 	spin_lock(&gblcnt_mutex);			//\lnlbl{sub:acquire}
@@ -80,7 +80,7 @@ static __inline__ unsigned long read_count(void)	//\lnlbl{read:b}
 	sum = globalcount;				//\lnlbl{read:initsum}
 	for_each_thread(t)				//\lnlbl{read:loop:b}
 		if (counterp[t] != NULL)
-			sum += *counterp[t];		//\lnlbl{read:loop:e}
+			sum += READ_ONCE(*counterp[t]);	//\lnlbl{read:loop:e}
 	spin_unlock(&gblcnt_mutex);			//\lnlbl{read:release}
 	return sum;					//\lnlbl{read:return}
 }							//\lnlbl{read:e}
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/5] count: Fix typo (\lnlbl{} -> \lnref{})
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
                   ` (4 preceding siblings ...)
  2018-10-08  1:49 ` [PATCH 5/5] count: Fix uses of READ/WRITE_ONCE() in count_lim Akira Yokosawa
@ 2018-10-08 14:02 ` Akira Yokosawa
  2018-10-09  3:54 ` [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Paul E. McKenney
  6 siblings, 0 replies; 8+ messages in thread
From: Akira Yokosawa @ 2018-10-08 14:02 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: perfbook, Akira Yokosawa

From 6cd4d087e034532c228589aa82b287233e45ccf8 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Mon, 8 Oct 2018 21:44:10 +0900
Subject: [PATCH 6/5] count: Fix typo (\lnlbl{} -> \lnref{})

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
---
Paul,

This fixes a couple of typo in "[PATCH 4/5] count: Employ new scheme for
snippet of count_lim".

        Thanks, Akira
--
 count/count.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/count/count.tex b/count/count.tex
index db2107e..f8c68a4 100644
--- a/count/count.tex
+++ b/count/count.tex
@@ -1482,7 +1482,7 @@ These lines form \co{sub_count()}'s fastpath, and, as with
 
 If the fastpath cannot accommodate subtraction of \co{delta},
 execution proceeds to the slowpath on
-lines~\lnlbl{acquire}-\lnlbl{return:gs}.
+lines~\lnref{acquire}-\lnref{return:gs}.
 Because the slowpath must access global state, line~\lnref{acquire}
 acquires \co{gblcnt_mutex}, which is released either by line~\lnref{release:f}
 (in case of failure) or by line~\lnref{release:s} (in case of success).
-- 
2.7.4



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/5] count: Employ new code-snippet scheme (cont.)
  2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
                   ` (5 preceding siblings ...)
  2018-10-08 14:02 ` [PATCH 6/5] count: Fix typo (\lnlbl{} -> \lnref{}) Akira Yokosawa
@ 2018-10-09  3:54 ` Paul E. McKenney
  6 siblings, 0 replies; 8+ messages in thread
From: Paul E. McKenney @ 2018-10-09  3:54 UTC (permalink / raw)
  To: Akira Yokosawa; +Cc: perfbook

On Mon, Oct 08, 2018 at 10:40:51AM +0900, Akira Yokosawa wrote:
> Hi Paul,
> 
> This is the continuation of changes to snippets in the "count"
> chapter.
> 
> To help your review of the changes regarding READ_ONCE()/WRITE_ONCE(),
> I split the patches to the ones just applying the scheme and
> the ones modifying READ_ONCE()/WRITE_ONCE().
> 
> Note on patch #3: It fixes the segfault observed in count_tstat.
> However, I'm not sure if you want to take it or not. The fail can
> be thought of as a Quick Quiz within the Answer to the Quick Quiz.
> So keeping it as is would also be reasonable.
> 
>         Thanks, Akira

They look good at first glance, so I have queued and pushed them.
Thank you very much!

							Thanx, Paul

> --
> Akira Yokosawa (5):
>   count: Employ new scheme for snippet of count_end and count_tstat
>   count: Fix uses of READ/WRITE_ONCE()s in count_end and count_tstat
>   count: Tweak counttorture.h to avoid segfault
>   count: Employ new scheme for snippet of count_lim
>   count: Fix uses of READ/WRITE_ONCE() in count_lim
> 
>  CodeSamples/count/count_end.c    |  53 ++---
>  CodeSamples/count/count_lim.c    | 147 +++++++------
>  CodeSamples/count/count_tstat.c  |  27 +--
>  CodeSamples/count/counttorture.h |  21 +-
>  count/count.tex                  | 437 +++++++++++++--------------------------
>  5 files changed, 287 insertions(+), 398 deletions(-)
> 
> -- 
> 2.7.4
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-10-09 11:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-08  1:40 [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Akira Yokosawa
2018-10-08  1:42 ` [PATCH 1/5] count: Employ new scheme for snippet of count_end and count_tstat Akira Yokosawa
2018-10-08  1:44 ` [PATCH 2/5] count: Fix uses of READ/WRITE_ONCE()s in " Akira Yokosawa
2018-10-08  1:46 ` [PATCH 3/5] count: Tweak counttorture.h to avoid segfault Akira Yokosawa
2018-10-08  1:47 ` [PATCH 4/5] count: Employ new scheme for snippet of count_lim Akira Yokosawa
2018-10-08  1:49 ` [PATCH 5/5] count: Fix uses of READ/WRITE_ONCE() in count_lim Akira Yokosawa
2018-10-08 14:02 ` [PATCH 6/5] count: Fix typo (\lnlbl{} -> \lnref{}) Akira Yokosawa
2018-10-09  3:54 ` [PATCH 0/5] count: Employ new code-snippet scheme (cont.) Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.