From 7e28c5af4ef6b539334aa5de40feca0c041c94df Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Jul 2018 08:09:28 -0700 Subject: [PATCH] rcu: Eliminate ->rcu_qs_ctr from the rcu_dynticks structure The ->rcu_qs_ctr counter was intended to allow providing a lightweight report of a quiescent state to all RCU flavors. But now that there is only one flavor of RCU in any one running kernel, there is no point in having this feature. This commit therefore removes the ->rcu_qs_ctr field from the rcu_dynticks structure and the ->rcu_qs_ctr_snap field from the rcu_data structure. This results in the "rqc" option to the rcu_fqs trace event no longer being used, so this commit also removes the "rqc" description from the header comment. While in the neighborhood, this commit also causes the forward-progress request .rcu_need_heavy_qs be set one jiffies_till_sched_qs interval later in the grace period than the first setting of .rcu_urgent_qs. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 5 ++-- kernel/rcu/tree.c | 52 ++++++++++---------------------------- kernel/rcu/tree.h | 3 --- kernel/rcu/tree_plugin.h | 5 +--- 4 files changed, 17 insertions(+), 48 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 175e0bce22bd..f0c4d10e614b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -393,9 +393,8 @@ TRACE_EVENT(rcu_quiescent_state_report, * Tracepoint for quiescent states detected by force_quiescent_state(). * These trace events include the type of RCU, the grace-period number * that was blocked by the CPU, the CPU itself, and the type of quiescent - * state, which can be "dti" for dyntick-idle mode, "kick" when kicking - * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the - * CPU got a quiescent state via its rcu_qs_ctr. + * state, which can be "dti" for dyntick-idle mode or "kick" when kicking + * a CPU that has been in dyntick-idle mode for too long. */ TRACE_EVENT(rcu_fqs, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 77d2cbf7c831..bc42c600027c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1018,25 +1018,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 1; } - /* - * Has this CPU encountered a cond_resched() since the beginning - * of the grace period? For this to be the case, the CPU has to - * have noticed the current grace period. This might not be the - * case for nohz_full CPUs looping in the kernel. - */ - jtsq = jiffies_till_sched_qs; - ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); - if (time_after(jiffies, rcu_state.gp_start + jtsq) && - READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && - rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) { - trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("rqc")); - rcu_gpnum_ovf(rnp, rdp); - return 1; - } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { - /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ - smp_store_release(ruqp, true); - } - /* If waiting too long on an offline CPU, complain. */ if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) && time_after(jiffies, rcu_state.gp_start + HZ)) { @@ -1060,29 +1041,27 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) /* * A CPU running for an extended time within the kernel can - * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, - * even context-switching back and forth between a pair of - * in-kernel CPU-bound tasks cannot advance grace periods. - * So if the grace period is old enough, make the CPU pay attention. - * Note that the unsynchronized assignments to the per-CPU - * rcu_need_heavy_qs variable are safe. Yes, setting of - * bits can be lost, but they will be set again on the next - * force-quiescent-state pass. So lost bit sets do not result - * in incorrect behavior, merely in a grace period lasting - * a few jiffies longer than it might otherwise. Because - * there are at most four threads involved, and because the - * updates are only once every few jiffies, the probability of - * lossage (and thus of slight grace-period extension) is - * quite low. + * delay RCU grace periods: (1) At age jiffies_till_sched_qs, + * set .rcu_urgent_qs, (2) At age 2*jiffies_till_sched_qs, set + * both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the + * unsynchronized assignments to the per-CPU rcu_need_heavy_qs + * variable are safe because the assignments are repeated if this + * CPU failed to pass through a quiescent state. This code + * also checks .jiffies_resched in case jiffies_till_sched_qs + * is set way high. */ + jtsq = jiffies_till_sched_qs; + ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); if (!READ_ONCE(*rnhqp) && - (time_after(jiffies, rcu_state.gp_start + jtsq) || + (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched))) { WRITE_ONCE(*rnhqp, true); /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ smp_store_release(ruqp, true); rcu_state.jiffies_resched += jtsq; /* Re-enable beating. */ + } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) { + WRITE_ONCE(*ruqp, true); } /* @@ -1091,7 +1070,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * see if the CPU is getting hammered with interrupts, but only * once per grace period, just to keep the IPIs down to a dull roar. */ - if (jiffies - rcu_state.gp_start > rcu_jiffies_till_stall_check() / 2) { + if (time_after(jiffies, rcu_state.jiffies_resched)) { resched_cpu(rdp->cpu); if (IS_ENABLED(CONFIG_IRQ_WORK) && !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && @@ -1669,7 +1648,6 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart")); need_gp = !!(rnp->qsmask & rdp->grpmask); rdp->cpu_no_qs.b.norm = need_gp; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); rdp->core_needs_qs = need_gp; zero_cpu_stall_ticks(rdp); } @@ -2230,7 +2208,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) * within the current grace period. */ rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } @@ -3213,7 +3190,6 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->gp_seq = rnp->gp_seq; rdp->gp_seq_needed = rnp->gp_seq; rdp->cpu_no_qs.b.norm = true; - rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); rdp->core_needs_qs = false; rdp->rcu_iw_pending = false; rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8cf93ac277ec..4866fa44ab0b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -42,7 +42,6 @@ struct rcu_dynticks { long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ - unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ #ifdef CONFIG_RCU_FAST_NO_HZ bool all_lazy; /* Are all CPU's CBs lazy? */ @@ -188,8 +187,6 @@ struct rcu_data { /* 1) quiescent-state and grace-period handling : */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ - unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ - /* for rcu_all_qs() invocations. */ union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index beaaca7a11f4..726d57708849 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -978,9 +978,7 @@ void rcu_all_qs(void) rcu_momentary_dyntick_idle(); local_irq_restore(flags); } - if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp))) - rcu_qs(); - this_cpu_inc(rcu_dynticks.rcu_qs_ctr); + rcu_qs(); barrier(); /* Avoid RCU read-side critical sections leaking up. */ preempt_enable(); } @@ -1000,7 +998,6 @@ void rcu_note_context_switch(bool preempt) this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); - this_cpu_inc(rcu_dynticks.rcu_qs_ctr); if (!preempt) rcu_tasks_qs(current); out: