From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755427AbdDLRle (ORCPT ); Wed, 12 Apr 2017 13:41:34 -0400 Received: from mx0a-001b2d01.pphosted.com ([148.163.156.1]:51985 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755257AbdDLRkm (ORCPT ); Wed, 12 Apr 2017 13:40:42 -0400 From: "Paul E. McKenney" To: linux-kernel@vger.kernel.org Cc: mingo@kernel.org, jiangshanlai@gmail.com, dipankar@in.ibm.com, akpm@linux-foundation.org, mathieu.desnoyers@efficios.com, josh@joshtriplett.org, tglx@linutronix.de, peterz@infradead.org, rostedt@goodmis.org, dhowells@redhat.com, edumazet@google.com, fweisbec@gmail.com, oleg@redhat.com, bobby.prani@gmail.com, "Paul E. McKenney" Subject: [PATCH tip/core/rcu 26/40] srcu: Use rcu_segcblist to track SRCU callbacks Date: Wed, 12 Apr 2017 10:40:11 -0700 X-Mailer: git-send-email 2.5.2 In-Reply-To: <20170412174003.GA23207@linux.vnet.ibm.com> References: <20170412174003.GA23207@linux.vnet.ibm.com> X-TM-AS-GCONF: 00 x-cbid: 17041217-2213-0000-0000-0000018E3922 X-IBM-SpamModules-Scores: X-IBM-SpamModules-Versions: BY=3.00006924; HX=3.00000240; KW=3.00000007; PH=3.00000004; SC=3.00000208; SDB=6.00846603; UDB=6.00417608; IPR=6.00625041; BA=6.00005286; NDR=6.00000001; ZLA=6.00000005; ZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000; ZU=6.00000002; MB=3.00015023; XFM=3.00000013; UTC=2017-04-12 17:40:34 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 17041217-2214-0000-0000-0000556F0075 Message-Id: <1492018825-25634-26-git-send-email-paulmck@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2017-04-12_13:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=1 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1702020001 definitions=main-1704120145 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This commit switches SRCU from custom-built callback queues to the new rcu_segcblist structure. This change associates grace-period sequence numbers with groups of callbacks, which will be needed for efficient processing of per-CPU callbacks. Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 678 ++++++++++++++++++++++++++++++++++++++++++ include/linux/srcu.h | 24 +- kernel/rcu/rcu.h | 6 + kernel/rcu/rcu_segcblist.h | 671 ----------------------------------------- kernel/rcu/srcu.c | 159 ++-------- kernel/rcu/tree.h | 2 +- 6 files changed, 721 insertions(+), 819 deletions(-) create mode 100644 include/linux/rcu_segcblist.h delete mode 100644 kernel/rcu/rcu_segcblist.h diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h new file mode 100644 index 000000000000..74b1e7243955 --- /dev/null +++ b/include/linux/rcu_segcblist.h @@ -0,0 +1,678 @@ +/* + * RCU segmented callback lists + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Authors: Paul E. McKenney + */ + +#ifndef __KERNEL_RCU_SEGCBLIST_H +#define __KERNEL_RCU_SEGCBLIST_H + +/* Simple unsegmented callback lists. */ +struct rcu_cblist { + struct rcu_head *head; + struct rcu_head **tail; + long len; + long len_lazy; +}; + +#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } + +/* Initialize simple callback list. */ +static inline void rcu_cblist_init(struct rcu_cblist *rclp) +{ + rclp->head = NULL; + rclp->tail = &rclp->head; + rclp->len = 0; + rclp->len_lazy = 0; +} + +/* Is simple callback list empty? */ +static inline bool rcu_cblist_empty(struct rcu_cblist *rclp) +{ + return !rclp->head; +} + +/* Return number of callbacks in simple callback list. */ +static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) +{ + return rclp->len; +} + +/* Return number of lazy callbacks in simple callback list. */ +static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp) +{ + return rclp->len_lazy; +} + +/* + * Debug function to actually count the number of callbacks. + * If the number exceeds the limit specified, return -1. + */ +static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) +{ + int cnt = 0; + struct rcu_head **rhpp = &rclp->head; + + for (;;) { + if (!*rhpp) + return cnt; + if (++cnt > lim) + return -1; + rhpp = &(*rhpp)->next; + } +} + +/* + * Dequeue the oldest rcu_head structure from the specified callback + * list. This function assumes that the callback is non-lazy, but + * the caller can later invoke rcu_cblist_dequeued_lazy() if it + * finds otherwise (and if it cares about laziness). This allows + * different users to have different ways of determining laziness. + */ +static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) +{ + struct rcu_head *rhp; + + rhp = rclp->head; + if (!rhp) + return NULL; + rclp->len--; + rclp->head = rhp->next; + if (!rclp->head) + rclp->tail = &rclp->head; + return rhp; +} + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) +{ + rclp->len_lazy--; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) +{ + return rclp->head; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) +{ + WARN_ON_ONCE(rcu_cblist_empty(rclp)); + return rclp->tail; +} + +/* Complicated segmented callback lists. ;-) */ + +/* + * Index values for segments in rcu_segcblist structure. + * + * The segments are as follows: + * + * [head, *tails[RCU_DONE_TAIL]): + * Callbacks whose grace period has elapsed, and thus can be invoked. + * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]): + * Callbacks waiting for the current GP from the current CPU's viewpoint. + * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]): + * Callbacks that arrived before the next GP started, again from + * the current CPU's viewpoint. These can be handled by the next GP. + * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]): + * Callbacks that might have arrived after the next GP started. + * There is some uncertainty as to when a given GP starts and + * ends, but a CPU knows the exact times if it is the one starting + * or ending the GP. Other CPUs know that the previous GP ends + * before the next one starts. + * + * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also + * empty. + * + * The ->gp_seq[] array contains the grace-period number at which the + * corresponding segment of callbacks will be ready to invoke. A given + * element of this array is meaningful only when the corresponding segment + * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks + * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have + * not yet been assigned a grace-period number). + */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_CBLIST_NSEGS 4 + +struct rcu_segcblist { + struct rcu_head *head; + struct rcu_head **tails[RCU_CBLIST_NSEGS]; + unsigned long gp_seq[RCU_CBLIST_NSEGS]; + long len; + long len_lazy; +}; + +#define RCU_SEGCBLIST_INITIALIZER(n) \ +{ \ + .head = NULL, \ + .tails[RCU_DONE_TAIL] = &n.head, \ + .tails[RCU_WAIT_TAIL] = &n.head, \ + .tails[RCU_NEXT_READY_TAIL] = &n.head, \ + .tails[RCU_NEXT_TAIL] = &n.head, \ +} + +/* + * Initialize an rcu_segcblist structure. + */ +static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp) +{ + int i; + + BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); + BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); + rsclp->head = NULL; + for (i = 0; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = &rsclp->head; + rsclp->len = 0; + rsclp->len_lazy = 0; +} + +/* + * Is the specified rcu_segcblist structure empty? + * + * But careful! The fact that the ->head field is NULL does not + * necessarily imply that there are no callbacks associated with + * this structure. When callbacks are being invoked, they are + * removed as a group. If callback invocation must be preempted, + * the remaining callbacks will be added back to the list. Either + * way, the counts are updated later. + * + * So it is often the case that rcu_segcblist_n_cbs() should be used + * instead. + */ +static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) +{ + return !rsclp->head; +} + +/* Return number of callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) +{ + return READ_ONCE(rsclp->len); +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len_lazy; +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len - rsclp->len_lazy; +} + +/* + * Is the specified rcu_segcblist enabled, for example, not corresponding + * to an offline or callback-offloaded CPU? + */ +static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) +{ + return !!rsclp->tails[RCU_NEXT_TAIL]; +} + +/* + * Disable the specified rcu_segcblist structure, so that callbacks can + * no longer be posted to it. This structure must be empty. + */ +static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); + rsclp->tails[RCU_NEXT_TAIL] = NULL; +} + +/* + * Is the specified segment of the specified rcu_segcblist structure + * empty of callbacks? + */ +static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) +{ + if (seg == RCU_DONE_TAIL) + return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; + return rsclp->tails[seg - 1] == rsclp->tails[seg]; +} + +/* + * Are all segments following the specified segment of the specified + * rcu_segcblist structure empty of callbacks? (The specified + * segment might well contain callbacks.) + */ +static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) +{ + return !*rsclp->tails[seg]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are ready to be invoked? + */ +static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are still pending, that is, not yet ready to be invoked? + */ +static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); +} + +/* + * Dequeue and return the first ready-to-invoke callback. If there + * are no ready-to-invoke callbacks, return NULL. Disables interrupts + * to avoid interference. Does not protect from interference from other + * CPUs or tasks. + */ +static inline struct rcu_head * +rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + int i; + struct rcu_head *rhp; + + local_irq_save(flags); + if (!rcu_segcblist_ready_cbs(rsclp)) { + local_irq_restore(flags); + return NULL; + } + rhp = rsclp->head; + BUG_ON(!rhp); + rsclp->head = rhp->next; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { + if (rsclp->tails[i] != &rhp->next) + break; + rsclp->tails[i] = &rsclp->head; + } + smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ + WRITE_ONCE(rsclp->len, rsclp->len - 1); + local_irq_restore(flags); + return rhp; +} + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + + local_irq_save(flags); + rsclp->len_lazy--; + local_irq_restore(flags); +} + +/* + * Return a pointer to the first callback in the specified rcu_segcblist + * structure. This is useful for diagnostics. + */ +static inline struct rcu_head * +rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return rsclp->head; + return NULL; +} + +/* + * Return a pointer to the first pending callback in the specified + * rcu_segcblist structure. This is useful just after posting a given + * callback -- if that callback is the first pending callback, then + * you cannot rely on someone else having already started up the required + * grace period. + */ +static inline struct rcu_head * +rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return *rsclp->tails[RCU_DONE_TAIL]; + return NULL; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * have not yet been processed beyond having been posted, that is, + * does it contain callbacks in its last segment? + */ +static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); +} + +/* + * Enqueue the specified callback onto the specified rcu_segcblist + * structure, updating accounting as needed. Note that the ->len + * field may be accessed locklessly, hence the WRITE_ONCE(). + * The ->len field is used by rcu_barrier() and friends to determine + * if it must post a callback on this structure, and it is OK + * for rcu_barrier() to sometimes post callbacks needlessly, but + * absolutely not OK for it to ever miss posting a callback. + */ +static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy) +{ + WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ + if (lazy) + rsclp->len_lazy++; + smp_mb(); /* Ensure counts are updated before callback is enqueued. */ + rhp->next = NULL; + *rsclp->tails[RCU_NEXT_TAIL] = rhp; + rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; +} + +/* + * Extract only the counts from the specified rcu_segcblist structure, + * and place them in the specified rcu_cblist structure. This function + * supports both callback orphaning and invocation, hence the separation + * of counts and callbacks. (Callbacks ready for invocation must be + * orphaned and adopted separately from pending callbacks, but counts + * apply to all callbacks. Locking must be used to make sure that + * both orphaned-callbacks lists are consistent.) + */ +static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rclp->len_lazy += rsclp->len_lazy; + rclp->len += rsclp->len; + rsclp->len_lazy = 0; + WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ +} + +/* + * Extract only those callbacks ready to be invoked from the specified + * rcu_segcblist structure and place them in the specified rcu_cblist + * structure. + */ +static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_ready_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = rsclp->head; + rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + rclp->tail = rsclp->tails[RCU_DONE_TAIL]; + for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) + if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) + rsclp->tails[i] = &rsclp->head; +} + +/* + * Extract only those callbacks still pending (not yet ready to be + * invoked) from the specified rcu_segcblist structure and place them in + * the specified rcu_cblist structure. Note that this loses information + * about any callbacks that might have been partway done waiting for + * their grace period. Too bad! They will have to start over. + */ +static inline void +rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_pend_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; + rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Move the entire contents of the specified rcu_segcblist structure, + * counts, callbacks, and all, to the specified rcu_cblist structure. + * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists? + * @@@ Memory barrier needed? (Not if only used at boot time...) + */ +static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rcu_segcblist_extract_done_cbs(rsclp, rclp); + rcu_segcblist_extract_pend_cbs(rsclp, rclp); + rcu_segcblist_extract_count(rsclp, rclp); +} + +/* + * Insert counts from the specified rcu_cblist structure in the + * specified rcu_segcblist structure. + */ +static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rsclp->len_lazy += rclp->len_lazy; + /* ->len sampled locklessly. */ + WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); + rclp->len_lazy = 0; + rclp->len = 0; +} + +/* + * Move callbacks from the specified rcu_cblist to the beginning of the + * done-callbacks segment of the specified rcu_segcblist. + */ +static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rclp->head) + return; /* No callbacks to move. */ + *rclp->tail = rsclp->head; + rsclp->head = rclp->head; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) + if (&rsclp->head == rsclp->tails[i]) + rsclp->tails[i] = rclp->tail; + else + break; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Move callbacks from the specified rcu_cblist to the end of the + * new-callbacks segment of the specified rcu_segcblist. + */ +static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + if (!rclp->head) + return; /* Nothing to do. */ + *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; + rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Advance the callbacks in the specified rcu_segcblist structure based + * on the current value passed in for the grace-period counter. + */ +static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i, j; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + + /* + * Find all callbacks whose ->gp_seq numbers indicate that they + * are ready to invoke, and put them into the RCU_DONE_TAIL segment. + */ + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { + if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + break; + rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; + } + + /* If no callbacks moved, nothing more need be done. */ + if (i == RCU_WAIT_TAIL) + return; + + /* Clean up tail pointers that might have been misordered above. */ + for (j = RCU_WAIT_TAIL; j < i; j++) + rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; + + /* + * Callbacks moved, so clean up the misordered ->tails[] pointers + * that now point into the middle of the list of ready-to-invoke + * callbacks. The overall effect is to copy down the later pointers + * into the gap that was created by the now-ready segments. + */ + for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { + if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) + break; /* No more callbacks. */ + rsclp->tails[j] = rsclp->tails[i]; + rsclp->gp_seq[j] = rsclp->gp_seq[i]; + } +} + +/* + * "Accelerate" callbacks based on more-accurate grace-period information. + * The reason for this is that RCU does not synchronize the beginnings and + * ends of grace periods, and that callbacks are posted locally. This in + * turn means that the callbacks must be labelled conservatively early + * on, as getting exact information would degrade both performance and + * scalability. When more accurate grace-period information becomes + * available, previously posted callbacks can be "accelerated", marking + * them to complete at the end of the earlier grace period. + * + * This function operates on an rcu_segcblist structure, and also the + * grace-period sequence number at which new callbacks would become + * ready to invoke. + */ +static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + + /* + * Find the segment preceding the oldest segment of callbacks + * whose ->gp_seq[] completion is at or after that passed in via + * "seq", skipping any empty segments. This oldest segment, along + * with any later segments, can be merged in with any newly arrived + * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" + * as their ->gp_seq[] grace-period completion sequence number. + */ + for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) + if (rsclp->tails[i] != rsclp->tails[i - 1] && + ULONG_CMP_LT(rsclp->gp_seq[i], seq)) + break; + + /* + * If all the segments contain callbacks that correspond to + * earlier grace-period sequence numbers than "seq", leave. + * Assuming that the rcu_segcblist structure has enough + * segments in its arrays, this can only happen if some of + * the non-done segments contain callbacks that really are + * ready to invoke. This situation will get straightened + * out by the next call to rcu_segcblist_advance(). + * + * Also advance to the oldest segment of callbacks whose + * ->gp_seq[] completion is at or after that passed in via "seq", + * skipping any empty segments. + */ + if (++i >= RCU_NEXT_TAIL) + return false; + + /* + * Merge all later callbacks, including newly arrived callbacks, + * into the segment located by the for-loop above. Assign "seq" + * as the ->gp_seq[] value in order to correctly handle the case + * where there were no pending callbacks in the rcu_segcblist + * structure other than in the RCU_NEXT_TAIL segment. + */ + for (; i < RCU_NEXT_TAIL; i++) { + rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; + rsclp->gp_seq[i] = seq; + } + return true; +} + +/* + * Scan the specified rcu_segcblist structure for callbacks that need + * a grace period later than the one specified by "seq". We don't look + * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't + * have a grace-period sequence number. + */ +static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i; + + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) + if (rsclp->tails[i - 1] != rsclp->tails[i] && + ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + return true; + return false; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) +{ + return rsclp->head; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); + return rsclp->tails[RCU_NEXT_TAIL]; +} + +#endif /* __KERNEL_RCU_SEGCBLIST_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 047ac8c28a4e..ad154a7bc114 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -22,7 +22,7 @@ * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -32,31 +32,20 @@ #include #include #include +#include struct srcu_array { unsigned long lock_count[2]; unsigned long unlock_count[2]; }; -struct rcu_batch { - struct rcu_head *head, **tail; -}; - -#define RCU_BATCH_INIT(name) { NULL, &(name.head) } - struct srcu_struct { unsigned long completed; unsigned long srcu_gp_seq; struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->batch_queue, ->running */ + spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */ int srcu_state; - /* callbacks just queued */ - struct rcu_batch batch_queue; - /* callbacks try to do the first check_zero */ - struct rcu_batch batch_check0; - /* callbacks done with the first check_zero and the flip */ - struct rcu_batch batch_check1; - struct rcu_batch batch_done; + struct rcu_segcblist srcu_cblist; struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -97,10 +86,7 @@ void process_srcu(struct work_struct *work); .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ .srcu_state = SRCU_STATE_IDLE, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ - .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ - .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ - .batch_done = RCU_BATCH_INIT(name.batch_done), \ + .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 0bc1313c49e2..a943b42a9cf7 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -87,6 +87,12 @@ static inline unsigned long rcu_seq_snap(unsigned long *sp) return s; } +/* Return the current value the update side's sequence number, no ordering. */ +static inline unsigned long rcu_seq_current(unsigned long *sp) +{ + return READ_ONCE(*sp); +} + /* * Given a snapshot from rcu_seq_snap(), determine whether or not a * full update-side operation has occurred. diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h deleted file mode 100644 index c32d13b368ac..000000000000 --- a/kernel/rcu/rcu_segcblist.h +++ /dev/null @@ -1,671 +0,0 @@ -/* - * RCU segmented callback lists - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * Copyright IBM Corporation, 2017 - * - * Authors: Paul E. McKenney - */ - -#ifndef __KERNEL_RCU_SEGCBLIST_H -#define __KERNEL_RCU_SEGCBLIST_H - -/* Simple unsegmented callback lists. */ -struct rcu_cblist { - struct rcu_head *head; - struct rcu_head **tail; - long len; - long len_lazy; -}; - -#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } - -/* Initialize simple callback list. */ -static inline void rcu_cblist_init(struct rcu_cblist *rclp) -{ - rclp->head = NULL; - rclp->tail = &rclp->head; - rclp->len = 0; - rclp->len_lazy = 0; -} - -/* Is simple callback list empty? */ -static inline bool rcu_cblist_empty(struct rcu_cblist *rclp) -{ - return !rclp->head; -} - -/* Return number of callbacks in simple callback list. */ -static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) -{ - return rclp->len; -} - -/* Return number of lazy callbacks in simple callback list. */ -static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp) -{ - return rclp->len_lazy; -} - -/* - * Debug function to actually count the number of callbacks. - * If the number exceeds the limit specified, return -1. - */ -static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) -{ - int cnt = 0; - struct rcu_head **rhpp = &rclp->head; - - for (;;) { - if (!*rhpp) - return cnt; - if (++cnt > lim) - return -1; - rhpp = &(*rhpp)->next; - } -} - -/* - * Dequeue the oldest rcu_head structure from the specified callback - * list. This function assumes that the callback is non-lazy, but - * the caller can later invoke rcu_cblist_dequeued_lazy() if it - * finds otherwise (and if it cares about laziness). This allows - * different users to have different ways of determining laziness. - */ -static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) -{ - struct rcu_head *rhp; - - rhp = rclp->head; - if (!rhp) - return NULL; - prefetch(rhp); - rclp->len--; - rclp->head = rhp->next; - if (!rclp->head) - rclp->tail = &rclp->head; - return rhp; -} - -/* - * Account for the fact that a previously dequeued callback turned out - * to be marked as lazy. - */ -static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) -{ - rclp->len_lazy--; -} - -/* - * Interim function to return rcu_cblist head pointer. Longer term, the - * rcu_cblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) -{ - return rclp->head; -} - -/* - * Interim function to return rcu_cblist head pointer. Longer term, the - * rcu_cblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) -{ - WARN_ON_ONCE(rcu_cblist_empty(rclp)); - return rclp->tail; -} - -/* Complicated segmented callback lists. ;-) */ - -/* - * Index values for segments in rcu_segcblist structure. - * - * The segments are as follows: - * - * [head, *tails[RCU_DONE_TAIL]): - * Callbacks whose grace period has elapsed, and thus can be invoked. - * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]): - * Callbacks waiting for the current GP from the current CPU's viewpoint. - * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]): - * Callbacks that arrived before the next GP started, again from - * the current CPU's viewpoint. These can be handled by the next GP. - * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]): - * Callbacks that might have arrived after the next GP started. - * There is some uncertainty as to when a given GP starts and - * ends, but a CPU knows the exact times if it is the one starting - * or ending the GP. Other CPUs know that the previous GP ends - * before the next one starts. - * - * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also - * empty. - * - * The ->gp_seq[] array contains the grace-period number at which the - * corresponding segment of callbacks will be ready to invoke. A given - * element of this array is meaningful only when the corresponding segment - * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks - * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have - * not yet been assigned a grace-period number). - */ -#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ -#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ -#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ -#define RCU_NEXT_TAIL 3 -#define RCU_CBLIST_NSEGS 4 - -struct rcu_segcblist { - struct rcu_head *head; - struct rcu_head **tails[RCU_CBLIST_NSEGS]; - unsigned long gp_seq[RCU_CBLIST_NSEGS]; - long len; - long len_lazy; -}; - -/* - * Initialize an rcu_segcblist structure. - */ -static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp) -{ - int i; - - BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); - BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); - rsclp->head = NULL; - for (i = 0; i < RCU_CBLIST_NSEGS; i++) - rsclp->tails[i] = &rsclp->head; - rsclp->len = 0; - rsclp->len_lazy = 0; -} - -/* - * Is the specified rcu_segcblist structure empty? - * - * But careful! The fact that the ->head field is NULL does not - * necessarily imply that there are no callbacks associated with - * this structure. When callbacks are being invoked, they are - * removed as a group. If callback invocation must be preempted, - * the remaining callbacks will be added back to the list. Either - * way, the counts are updated later. - * - * So it is often the case that rcu_segcblist_n_cbs() should be used - * instead. - */ -static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) -{ - return !rsclp->head; -} - -/* Return number of callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) -{ - return READ_ONCE(rsclp->len); -} - -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) -{ - return rsclp->len_lazy; -} - -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) -{ - return rsclp->len - rsclp->len_lazy; -} - -/* - * Is the specified rcu_segcblist enabled, for example, not corresponding - * to an offline or callback-offloaded CPU? - */ -static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) -{ - return !!rsclp->tails[RCU_NEXT_TAIL]; -} - -/* - * Disable the specified rcu_segcblist structure, so that callbacks can - * no longer be posted to it. This structure must be empty. - */ -static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); - WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); - WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); - rsclp->tails[RCU_NEXT_TAIL] = NULL; -} - -/* - * Is the specified segment of the specified rcu_segcblist structure - * empty of callbacks? - */ -static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) -{ - if (seg == RCU_DONE_TAIL) - return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; - return rsclp->tails[seg - 1] == rsclp->tails[seg]; -} - -/* - * Are all segments following the specified segment of the specified - * rcu_segcblist structure empty of callbacks? (The specified - * segment might well contain callbacks.) - */ -static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) -{ - return !*rsclp->tails[seg]; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * are ready to be invoked? - */ -static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * are still pending, that is, not yet ready to be invoked? - */ -static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); -} - -/* - * Dequeue and return the first ready-to-invoke callback. If there - * are no ready-to-invoke callbacks, return NULL. Disables interrupts - * to avoid interference. Does not protect from interference from other - * CPUs or tasks. - */ -static inline struct rcu_head * -rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) -{ - unsigned long flags; - int i; - struct rcu_head *rhp; - - local_irq_save(flags); - if (!rcu_segcblist_ready_cbs(rsclp)) { - local_irq_restore(flags); - return NULL; - } - rhp = rsclp->head; - BUG_ON(!rhp); - rsclp->head = rhp->next; - for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { - if (rsclp->tails[i] != &rhp->next) - break; - rsclp->tails[i] = &rsclp->head; - } - smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ - WRITE_ONCE(rsclp->len, rsclp->len - 1); - local_irq_restore(flags); - return rhp; -} - -/* - * Account for the fact that a previously dequeued callback turned out - * to be marked as lazy. - */ -static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) -{ - unsigned long flags; - - local_irq_save(flags); - rsclp->len_lazy--; - local_irq_restore(flags); -} - -/* - * Return a pointer to the first callback in the specified rcu_segcblist - * structure. This is useful for diagnostics. - */ -static inline struct rcu_head * -rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) -{ - if (rcu_segcblist_is_enabled(rsclp)) - return rsclp->head; - return NULL; -} - -/* - * Return a pointer to the first pending callback in the specified - * rcu_segcblist structure. This is useful just after posting a given - * callback -- if that callback is the first pending callback, then - * you cannot rely on someone else having already started up the required - * grace period. - */ -static inline struct rcu_head * -rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) -{ - if (rcu_segcblist_is_enabled(rsclp)) - return *rsclp->tails[RCU_DONE_TAIL]; - return NULL; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * have not yet been processed beyond having been posted, that is, - * does it contain callbacks in its last segment? - */ -static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); -} - -/* - * Enqueue the specified callback onto the specified rcu_segcblist - * structure, updating accounting as needed. Note that the ->len - * field may be accessed locklessly, hence the WRITE_ONCE(). - * The ->len field is used by rcu_barrier() and friends to determine - * if it must post a callback on this structure, and it is OK - * for rcu_barrier() to sometimes post callbacks needlessly, but - * absolutely not OK for it to ever miss posting a callback. - */ -static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy) -{ - WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ - if (lazy) - rsclp->len_lazy++; - smp_mb(); /* Ensure counts are updated before callback is enqueued. */ - rhp->next = NULL; - *rsclp->tails[RCU_NEXT_TAIL] = rhp; - rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; -} - -/* - * Extract only the counts from the specified rcu_segcblist structure, - * and place them in the specified rcu_cblist structure. This function - * supports both callback orphaning and invocation, hence the separation - * of counts and callbacks. (Callbacks ready for invocation must be - * orphaned and adopted separately from pending callbacks, but counts - * apply to all callbacks. Locking must be used to make sure that - * both orphaned-callbacks lists are consistent.) - */ -static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rclp->len_lazy += rsclp->len_lazy; - rclp->len += rsclp->len; - rsclp->len_lazy = 0; - WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ -} - -/* - * Extract only those callbacks ready to be invoked from the specified - * rcu_segcblist structure and place them in the specified rcu_cblist - * structure. - */ -static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rcu_segcblist_ready_cbs(rsclp)) - return; /* Nothing to do. */ - *rclp->tail = rsclp->head; - rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; - rclp->tail = rsclp->tails[RCU_DONE_TAIL]; - for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) - if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) - rsclp->tails[i] = &rsclp->head; -} - -/* - * Extract only those callbacks still pending (not yet ready to be - * invoked) from the specified rcu_segcblist structure and place them in - * the specified rcu_cblist structure. Note that this loses information - * about any callbacks that might have been partway done waiting for - * their grace period. Too bad! They will have to start over. - */ -static inline void -rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rcu_segcblist_pend_cbs(rsclp)) - return; /* Nothing to do. */ - *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; - rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; - for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) - rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; -} - -/* - * Move the entire contents of the specified rcu_segcblist structure, - * counts, callbacks, and all, to the specified rcu_cblist structure. - * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists? - * @@@ Memory barrier needed? (Not if only used at boot time...) - */ -static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rcu_segcblist_extract_done_cbs(rsclp, rclp); - rcu_segcblist_extract_pend_cbs(rsclp, rclp); - rcu_segcblist_extract_count(rsclp, rclp); -} - -/* - * Insert counts from the specified rcu_cblist structure in the - * specified rcu_segcblist structure. - */ -static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - - rsclp->len_lazy += rclp->len_lazy; - /* ->len sampled locklessly. */ - WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); - rclp->len_lazy = 0; - rclp->len = 0; -} - -/* - * Move callbacks from the specified rcu_cblist to the beginning of the - * done-callbacks segment of the specified rcu_segcblist. - */ -static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rclp->head) - return; /* No callbacks to move. */ - *rclp->tail = rsclp->head; - rsclp->head = rclp->head; - for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) - if (&rsclp->head == rsclp->tails[i]) - rsclp->tails[i] = rclp->tail; - else - break; - rclp->head = NULL; - rclp->tail = &rclp->head; -} - -/* - * Move callbacks from the specified rcu_cblist to the end of the - * new-callbacks segment of the specified rcu_segcblist. - */ -static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - if (!rclp->head) - return; /* Nothing to do. */ - *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; - rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; - rclp->head = NULL; - rclp->tail = &rclp->head; -} - -/* - * Advance the callbacks in the specified rcu_segcblist structure based - * on the current value passed in for the grace-period counter. - */ -static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i, j; - - WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); - - /* - * Find all callbacks whose ->gp_seq numbers indicate that they - * are ready to invoke, and put them into the RCU_DONE_TAIL segment. - */ - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { - if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) - break; - rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; - } - - /* If no callbacks moved, nothing more need be done. */ - if (i == RCU_WAIT_TAIL) - return; - - /* Clean up tail pointers that might have been misordered above. */ - for (j = RCU_WAIT_TAIL; j < i; j++) - rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; - - /* - * Callbacks moved, so clean up the misordered ->tails[] pointers - * that now point into the middle of the list of ready-to-invoke - * callbacks. The overall effect is to copy down the later pointers - * into the gap that was created by the now-ready segments. - */ - for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { - if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) - break; /* No more callbacks. */ - rsclp->tails[j] = rsclp->tails[i]; - rsclp->gp_seq[j] = rsclp->gp_seq[i]; - } -} - -/* - * "Accelerate" callbacks based on more-accurate grace-period information. - * The reason for this is that RCU does not synchronize the beginnings and - * ends of grace periods, and that callbacks are posted locally. This in - * turn means that the callbacks must be labelled conservatively early - * on, as getting exact information would degrade both performance and - * scalability. When more accurate grace-period information becomes - * available, previously posted callbacks can be "accelerated", marking - * them to complete at the end of the earlier grace period. - * - * This function operates on an rcu_segcblist structure, and also the - * grace-period sequence number at which new callbacks would become - * ready to invoke. - */ -static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i; - - WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); - - /* - * Find the segment preceding the oldest segment of callbacks - * whose ->gp_seq[] completion is at or after that passed in via - * "seq", skipping any empty segments. This oldest segment, along - * with any later segments, can be merged in with any newly arrived - * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" - * as their ->gp_seq[] grace-period completion sequence number. - */ - for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) - if (rsclp->tails[i] != rsclp->tails[i - 1] && - ULONG_CMP_LT(rsclp->gp_seq[i], seq)) - break; - - /* - * If all the segments contain callbacks that correspond to - * earlier grace-period sequence numbers than "seq", leave. - * Assuming that the rcu_segcblist structure has enough - * segments in its arrays, this can only happen if some of - * the non-done segments contain callbacks that really are - * ready to invoke. This situation will get straightened - * out by the next call to rcu_segcblist_advance(). - * - * Also advance to the oldest segment of callbacks whose - * ->gp_seq[] completion is at or after that passed in via "seq", - * skipping any empty segments. - */ - if (++i >= RCU_NEXT_TAIL) - return false; - - /* - * Merge all later callbacks, including newly arrived callbacks, - * into the segment located by the for-loop above. Assign "seq" - * as the ->gp_seq[] value in order to correctly handle the case - * where there were no pending callbacks in the rcu_segcblist - * structure other than in the RCU_NEXT_TAIL segment. - */ - for (; i < RCU_NEXT_TAIL; i++) { - rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; - rsclp->gp_seq[i] = seq; - } - return true; -} - -/* - * Scan the specified rcu_segcblist structure for callbacks that need - * a grace period later than the one specified by "seq". We don't look - * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't - * have a grace-period sequence number. - */ -static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i; - - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) - if (rsclp->tails[i - 1] != rsclp->tails[i] && - ULONG_CMP_LT(seq, rsclp->gp_seq[i])) - return true; - return false; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) -{ - return rsclp->head; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); - return rsclp->tails[RCU_NEXT_TAIL]; -} - -#endif /* __KERNEL_RCU_SEGCBLIST_H */ diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index ed7c3d082b9f..132cd57c07f1 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -22,7 +22,7 @@ * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -38,85 +38,13 @@ #include "rcu.h" -/* - * Initialize an rcu_batch structure to empty. - */ -static inline void rcu_batch_init(struct rcu_batch *b) -{ - b->head = NULL; - b->tail = &b->head; -} - -/* - * Enqueue a callback onto the tail of the specified rcu_batch structure. - */ -static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) -{ - *b->tail = head; - b->tail = &head->next; -} - -/* - * Is the specified rcu_batch structure empty? - */ -static inline bool rcu_batch_empty(struct rcu_batch *b) -{ - return b->tail == &b->head; -} - -/* - * Are all batches empty for the specified srcu_struct? - */ -static inline bool rcu_all_batches_empty(struct srcu_struct *sp) -{ - return rcu_batch_empty(&sp->batch_done) && - rcu_batch_empty(&sp->batch_check1) && - rcu_batch_empty(&sp->batch_check0) && - rcu_batch_empty(&sp->batch_queue); -} - -/* - * Remove the callback at the head of the specified rcu_batch structure - * and return a pointer to it, or return NULL if the structure is empty. - */ -static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) -{ - struct rcu_head *head; - - if (rcu_batch_empty(b)) - return NULL; - - head = b->head; - b->head = head->next; - if (b->tail == &head->next) - rcu_batch_init(b); - - return head; -} - -/* - * Move all callbacks from the rcu_batch structure specified by "from" to - * the structure specified by "to". - */ -static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) -{ - if (!rcu_batch_empty(from)) { - *to->tail = from->head; - to->tail = from->tail; - rcu_batch_init(from); - } -} - static int init_srcu_struct_fields(struct srcu_struct *sp) { sp->completed = 0; sp->srcu_gp_seq = 0; spin_lock_init(&sp->queue_lock); sp->srcu_state = SRCU_STATE_IDLE; - rcu_batch_init(&sp->batch_queue); - rcu_batch_init(&sp->batch_check0); - rcu_batch_init(&sp->batch_check1); - rcu_batch_init(&sp->batch_done); + rcu_segcblist_init(&sp->srcu_cblist); INIT_DELAYED_WORK(&sp->work, process_srcu); sp->per_cpu_ref = alloc_percpu(struct srcu_array); return sp->per_cpu_ref ? 0 : -ENOMEM; @@ -262,7 +190,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) { if (WARN_ON(srcu_readers_active(sp))) return; /* Leakage unless caller handles error. */ - if (WARN_ON(!rcu_all_batches_empty(sp))) + if (WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist))) return; /* Leakage unless caller handles error. */ flush_delayed_work(&sp->work); if (WARN_ON(READ_ONCE(sp->srcu_state) != SRCU_STATE_IDLE)) @@ -318,6 +246,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); */ static void srcu_gp_start(struct srcu_struct *sp) { + rcu_segcblist_accelerate(&sp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN1); rcu_seq_start(&sp->srcu_gp_seq); } @@ -365,6 +295,11 @@ static void srcu_gp_end(struct srcu_struct *sp) { rcu_seq_end(&sp->srcu_gp_seq); WRITE_ONCE(sp->srcu_state, SRCU_STATE_DONE); + + spin_lock_irq(&sp->queue_lock); + rcu_segcblist_advance(&sp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + spin_unlock_irq(&sp->queue_lock); } /* @@ -403,7 +338,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, head->func = func; spin_lock_irqsave(&sp->queue_lock, flags); smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ - rcu_batch_queue(&sp->batch_queue, head); + rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { srcu_gp_start(sp); queue_delayed_work(system_power_efficient_wq, &sp->work, 0); @@ -439,13 +374,13 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ if (READ_ONCE(sp->srcu_state) == SRCU_STATE_IDLE) { /* steal the processing owner */ + rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); srcu_gp_start(sp); - rcu_batch_queue(&sp->batch_check0, head); spin_unlock_irq(&sp->queue_lock); /* give the processing owner to work_struct */ srcu_reschedule(sp, 0); } else { - rcu_batch_queue(&sp->batch_queue, head); + rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); spin_unlock_irq(&sp->queue_lock); } @@ -543,19 +478,6 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed); #define SRCU_INTERVAL 1 /* - * Move any new SRCU callbacks to the first stage of the SRCU grace - * period pipeline. - */ -static void srcu_collect_new(struct srcu_struct *sp) -{ - if (!rcu_batch_empty(&sp->batch_queue)) { - spin_lock_irq(&sp->queue_lock); - rcu_batch_move(&sp->batch_check0, &sp->batch_queue); - spin_unlock_irq(&sp->queue_lock); - } -} - -/* * Core SRCU state machine. Advance callbacks from ->batch_check0 to * ->batch_check1 and then to ->batch_done as readers drain. */ @@ -580,26 +502,7 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) idx = 1 ^ (sp->completed & 1); if (!try_check_zero(sp, idx, trycount)) return; /* readers present, retry after SRCU_INTERVAL */ - - /* - * The callbacks in ->batch_check1 have already done - * with their first zero check and flip back when they were - * enqueued on ->batch_check0 in a previous invocation of - * srcu_advance_batches(). (Presumably try_check_zero() - * returned false during that invocation, leaving the - * callbacks stranded on ->batch_check1.) They are therefore - * ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); srcu_flip(sp); - - /* - * The callbacks in ->batch_check0 just finished their - * first check zero and flip, so move them to ->batch_check1 - * for future checking on the other idx. - */ - rcu_batch_move(&sp->batch_check1, &sp->batch_check0); - WRITE_ONCE(sp->srcu_state, SRCU_STATE_SCAN2); } @@ -613,14 +516,6 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) trycount = trycount < 2 ? 2 : trycount; if (!try_check_zero(sp, idx, trycount)) return; /* readers present, retry after SRCU_INTERVAL */ - - /* - * The callbacks in ->batch_check1 have now waited for - * all pre-existing readers using both idx values. They are - * therefore ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); - srcu_gp_end(sp); } } @@ -633,17 +528,26 @@ static void srcu_advance_batches(struct srcu_struct *sp, int trycount) */ static void srcu_invoke_callbacks(struct srcu_struct *sp) { - int i; - struct rcu_head *head; + struct rcu_cblist ready_cbs; + struct rcu_head *rhp; - for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { - head = rcu_batch_dequeue(&sp->batch_done); - if (!head) - break; + spin_lock_irq(&sp->queue_lock); + if (!rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { + spin_unlock_irq(&sp->queue_lock); + return; + } + rcu_cblist_init(&ready_cbs); + rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs); + spin_unlock_irq(&sp->queue_lock); + rhp = rcu_cblist_dequeue(&ready_cbs); + for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { local_bh_disable(); - head->func(head); + rhp->func(rhp); local_bh_enable(); } + spin_lock_irq(&sp->queue_lock); + rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs); + spin_unlock_irq(&sp->queue_lock); } /* @@ -654,9 +558,9 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) { bool pending = true; - if (rcu_all_batches_empty(sp)) { + if (rcu_segcblist_empty(&sp->srcu_cblist)) { spin_lock_irq(&sp->queue_lock); - if (rcu_all_batches_empty(sp) && + if (rcu_segcblist_empty(&sp->srcu_cblist) && READ_ONCE(sp->srcu_state) == SRCU_STATE_DONE) { WRITE_ONCE(sp->srcu_state, SRCU_STATE_IDLE); pending = false; @@ -677,7 +581,6 @@ void process_srcu(struct work_struct *work) sp = container_of(work, struct srcu_struct, work.work); - srcu_collect_new(sp); srcu_advance_batches(sp, 1); srcu_invoke_callbacks(sp); srcu_reschedule(sp, SRCU_INTERVAL); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 93889ff21dbb..4f62651588ea 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -30,7 +30,7 @@ #include #include #include -#include "rcu_segcblist.h" +#include /* * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and -- 2.5.2