Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu

Pull RCU fixes from Paul McKenney:
 "I must confess that this past merge window was not RCU's best showing.
  This series contains three more fixes for RCU regressions:

   1.   A fix to __DECLARE_TRACE_RCU() that causes it to act as an
        interrupt from idle rather than as a task switch from idle.
        This change is needed due to the recent use of _rcuidle()
        tracepoints that can be invoked from interrupt handlers as well
        as from idle.  Without this fix, invoking _rcuidle() tracepoints
        from interrupt handlers results in splats and (more seriously)
        confusion on RCU's part as to whether a given CPU is idle or not.
        This confusion can in turn result in too-short grace periods and
        therefore random memory corruption.

   2.   A fix to a subtle deadlock that could result due to RCU doing
        a wakeup while holding one of its rcu_node structure's locks.
        Although the probability of occurrence is low, it really
        does happen.  The fix, courtesy of Steven Rostedt, uses
        irq_work_queue() to avoid the deadlock.

   3.   A fix to a silent deadlock (invisible to lockdep) due to the
        interaction of timeouts posted by RCU debug code enabled by
        CONFIG_PROVE_RCU_DELAY=y, grace-period initialization, and CPU
        hotplug operations.  This will not occur in production kernels,
        but really does occur in randconfig testing.  Diagnosis courtesy
        of Steven Rostedt"

* 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu:
  rcu: Fix deadlock with CPU hotplug, RCU GP init, and timer migration
  rcu: Don't call wakeup() with rcu_node structure ->lock held
  trace: Allow idle-safe tracepoints to be called from irq
This commit is contained in:
Linus Torvalds 2013-06-13 12:36:42 -07:00
commit cb7e9704d5
4 changed files with 22 additions and 6 deletions

View File

@ -145,8 +145,8 @@ static inline void tracepoint_synchronize_unregister(void)
TP_PROTO(data_proto), \ TP_PROTO(data_proto), \
TP_ARGS(data_args), \ TP_ARGS(data_args), \
TP_CONDITION(cond), \ TP_CONDITION(cond), \
rcu_idle_exit(), \ rcu_irq_enter(), \
rcu_idle_enter()); \ rcu_irq_exit()); \
} }
#else #else
#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)

View File

@ -431,6 +431,7 @@ choice
config TREE_RCU config TREE_RCU
bool "Tree-based hierarchical RCU" bool "Tree-based hierarchical RCU"
depends on !PREEMPT && SMP depends on !PREEMPT && SMP
select IRQ_WORK
help help
This option selects the RCU implementation that is This option selects the RCU implementation that is
designed for very large SMP system with hundreds or designed for very large SMP system with hundreds or

View File

@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
rnp->grphi, rnp->qsmask); rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock); raw_spin_unlock_irq(&rnp->lock);
#ifdef CONFIG_PROVE_RCU_DELAY #ifdef CONFIG_PROVE_RCU_DELAY
if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
system_state == SYSTEM_RUNNING) system_state == SYSTEM_RUNNING)
schedule_timeout_uninterruptible(2); udelay(200);
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
cond_resched(); cond_resched();
} }
@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
} }
} }
static void rsp_wakeup(struct irq_work *work)
{
struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(&rsp->gp_wq);
}
/* /*
* Start a new RCU grace period if warranted, re-initializing the hierarchy * Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold * in preparation for detecting the next grace period. The caller must hold
@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
} }
rsp->gp_flags = RCU_GP_FLAG_INIT; rsp->gp_flags = RCU_GP_FLAG_INIT;
/* Wake up rcu_gp_kthread() to start the grace period. */ /*
wake_up(&rsp->gp_wq); * We can't do wakeups while holding the rnp->lock, as that
* could cause possible deadlocks with the rq->lock. Deter
* the wakeup to interrupt context.
*/
irq_work_queue(&rsp->wakeup_work);
} }
/* /*
@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
rsp->rda = rda; rsp->rda = rda;
init_waitqueue_head(&rsp->gp_wq); init_waitqueue_head(&rsp->gp_wq);
init_irq_work(&rsp->wakeup_work, rsp_wakeup);
rnp = rsp->level[rcu_num_lvls - 1]; rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
while (i > rnp->grphi) while (i > rnp->grphi)

View File

@ -27,6 +27,7 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include <linux/irq_work.h>
/* /*
* Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@ -442,6 +443,7 @@ struct rcu_state {
char *name; /* Name of structure. */ char *name; /* Name of structure. */
char abbr; /* Abbreviated name. */ char abbr; /* Abbreviated name. */
struct list_head flavors; /* List of RCU flavors. */ struct list_head flavors; /* List of RCU flavors. */
struct irq_work wakeup_work; /* Postponed wakeups */
}; };
/* Values for rcu_state structure's gp_flags field. */ /* Values for rcu_state structure's gp_flags field. */