Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "MCE handling updates, but also some generic drivers/edac/ changes to
  better organize the Kconfig space"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ras: Move AMD MCE injector to arch/x86/ras/
  x86/mce: Add a wrapper around mce_log() for injection
  x86/mce: Rename rcu_dereference_check_mce() to mce_log_get_idx_check()
  RAS: Add a menuconfig option with descriptive text
  x86/mce: Reenable CMCI banks when swiching back to interrupt mode
  x86/mce: Clear Local MCE opt-in before kexec
  x86/mce: Remove unused function declarations
  x86/mce: Kill drain_mcelog_buffer()
  x86/mce: Avoid potential deadlock due to printk() in MCE context
  x86/mce: Remove the MCE ring for Action Optional errors
  x86/mce: Don't use percpu workqueues
  x86/mce: Provide a lockless memory pool to save error records
  x86/mce: Reuse one of the u16 padding fields in 'struct mce'
This commit is contained in:
Linus Torvalds 2015-08-31 20:20:30 -07:00
commit 3959df1dfb
18 changed files with 329 additions and 164 deletions

View File

@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
config X86_MCE config X86_MCE
bool "Machine Check / overheating reporting" bool "Machine Check / overheating reporting"
select GENERIC_ALLOCATOR
default y default y
---help--- ---help---
Machine Check support allows the processor to notify the Machine Check support allows the processor to notify the

View File

@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/
drivers-$(CONFIG_FB) += arch/x86/video/ drivers-$(CONFIG_FB) += arch/x86/video/
drivers-$(CONFIG_RAS) += arch/x86/ras/
#### ####
# boot loader support. Several targets are kept for legacy purposes # boot loader support. Several targets are kept for legacy purposes

View File

@ -151,10 +151,12 @@ extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
int mcheck_init(void); int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
void mcheck_vendor_init_severity(void); void mcheck_vendor_init_severity(void);
#else #else
static inline int mcheck_init(void) { return 0; } static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline void mcheck_vendor_init_severity(void) {} static inline void mcheck_vendor_init_severity(void) {}
#endif #endif
@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c); void mce_intel_feature_init(struct cpuinfo_x86 *c);
void mce_intel_feature_clear(struct cpuinfo_x86 *c);
void cmci_clear(void); void cmci_clear(void);
void cmci_reenable(void); void cmci_reenable(void);
void cmci_rediscover(void); void cmci_rediscover(void);
void cmci_recheck(void); void cmci_recheck(void);
void lmce_clear(void);
void lmce_enable(void);
#else #else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {} static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {} static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {} static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {} static inline void cmci_recheck(void) {}
static inline void lmce_clear(void) {}
static inline void lmce_enable(void) {}
#endif #endif
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD

View File

@ -15,7 +15,8 @@ struct mce {
__u64 time; /* wall time_t when error was detected */ __u64 time; /* wall time_t when error was detected */
__u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */ __u8 inject_flags; /* software inject flags */
__u16 pad; __u8 severity;
__u8 usable_addr;
__u32 cpuid; /* CPUID 1 EAX */ __u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */ __u8 cs; /* code segment */
__u8 bank; /* machine check bank */ __u8 bank; /* machine check bank */

View File

@ -1,4 +1,4 @@
obj-y = mce.o mce-severity.o obj-y = mce.o mce-severity.o mce-genpool.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o

View File

@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
m.addr = mem_err->physical_addr; m.addr = mem_err->physical_addr;
mce_log(&m); mce_log(&m);
mce_notify_irq();
} }
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

View File

@ -0,0 +1,99 @@
/*
* MCE event pool management in MCE context
*
* Copyright (C) 2015 Intel Corp.
* Author: Chen, Gong <gong.chen@linux.intel.com>
*
* This file is licensed under GPLv2.
*/
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/genalloc.h>
#include <linux/llist.h>
#include "mce-internal.h"
/*
* printk() is not safe in MCE context. This is a lock-less memory allocator
* used to save error information organized in a lock-less list.
*
* This memory pool is only to be used to save MCE records in MCE context.
* MCE events are rare, so a fixed size memory pool should be enough. Use
* 2 pages to save MCE events for now (~80 MCE records at most).
*/
#define MCE_POOLSZ (2 * PAGE_SIZE)
static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
void mce_gen_pool_process(void)
{
struct llist_node *head;
struct mce_evt_llist *node;
struct mce *mce;
head = llist_del_all(&mce_event_llist);
if (!head)
return;
head = llist_reverse_order(head);
llist_for_each_entry(node, head, llnode) {
mce = &node->mce;
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
}
}
bool mce_gen_pool_empty(void)
{
return llist_empty(&mce_event_llist);
}
int mce_gen_pool_add(struct mce *mce)
{
struct mce_evt_llist *node;
if (!mce_evt_pool)
return -EINVAL;
node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
if (!node) {
pr_warn_ratelimited("MCE records pool full!\n");
return -ENOMEM;
}
memcpy(&node->mce, mce, sizeof(*mce));
llist_add(&node->llnode, &mce_event_llist);
return 0;
}
static int mce_gen_pool_create(void)
{
struct gen_pool *tmpp;
int ret = -ENOMEM;
tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
if (!tmpp)
goto out;
ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
if (ret) {
gen_pool_destroy(tmpp);
goto out;
}
mce_evt_pool = tmpp;
out:
return ret;
}
int mce_gen_pool_init(void)
{
/* Just init mce_gen_pool once. */
if (mce_evt_pool)
return 0;
return mce_gen_pool_create();
}

View File

@ -13,6 +13,8 @@ enum severity_level {
MCE_PANIC_SEVERITY, MCE_PANIC_SEVERITY,
}; };
extern struct atomic_notifier_head x86_mce_decoder_chain;
#define ATTR_LEN 16 #define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
@ -24,6 +26,16 @@ struct mce_bank {
char attrname[ATTR_LEN]; /* attribute name */ char attrname[ATTR_LEN]; /* attribute name */
}; };
struct mce_evt_llist {
struct llist_node llnode;
struct mce mce;
};
void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void); struct dentry *mce_get_debugfs_dir(void);
@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
return -EINVAL; return -EINVAL;
} }
#endif #endif
void mce_inject_log(struct mce *m);

View File

@ -52,11 +52,11 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex); static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \ #define mce_log_get_idx_check(p) \
({ \ ({ \
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
!lockdep_is_held(&mce_chrdev_read_mutex), \ !lockdep_is_held(&mce_chrdev_read_mutex), \
"suspicious rcu_dereference_check_mce() usage"); \ "suspicious mce_log_get_idx_check() usage"); \
smp_load_acquire(&(p)); \ smp_load_acquire(&(p)); \
}) })
@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
*/ */
mce_banks_t mce_banks_ce_disabled; mce_banks_t mce_banks_ce_disabled;
static DEFINE_PER_CPU(struct work_struct, mce_work); static struct work_struct mce_work;
static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
static int mce_usable_address(struct mce *m);
/* /*
* CPU/chipset specific EDAC code can register a notifier call here to print * CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form. * MCE errors in a human-readable form.
*/ */
static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */ /* Do initial initialization of a struct mce */
void mce_setup(struct mce *m) void mce_setup(struct mce *m)
@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
/* Emit the trace record: */ /* Emit the trace record: */
trace_mce_record(mce); trace_mce_record(mce);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
mce->finished = 0; mce->finished = 0;
wmb(); wmb();
for (;;) { for (;;) {
entry = rcu_dereference_check_mce(mcelog.next); entry = mce_log_get_idx_check(mcelog.next);
for (;;) { for (;;) {
/* /*
@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify); set_bit(0, &mce_need_notify);
} }
static void drain_mcelog_buffer(void) void mce_inject_log(struct mce *m)
{ {
unsigned int next, i, prev = 0; mutex_lock(&mce_chrdev_read_mutex);
mce_log(m);
next = ACCESS_ONCE(mcelog.next); mutex_unlock(&mce_chrdev_read_mutex);
do {
struct mce *m;
/* drain what was logged during boot */
for (i = prev; i < next; i++) {
unsigned long start = jiffies;
unsigned retries = 1;
m = &mcelog.entry[i];
while (!m->finished) {
if (time_after_eq(jiffies, start + 2*retries))
retries++;
cpu_relax();
if (!m->finished && retries >= 4) {
pr_err("skipping error being logged currently!\n");
break;
}
}
smp_rmb();
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
prev = next;
next = cmpxchg(&mcelog.next, prev, 0);
} while (next != prev);
} }
EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb;
void mce_register_decode_chain(struct notifier_block *nb) void mce_register_decode_chain(struct notifier_block *nb)
{ {
/* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1;
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
drain_mcelog_buffer();
} }
EXPORT_SYMBOL_GPL(mce_register_decode_chain); EXPORT_SYMBOL_GPL(mce_register_decode_chain);
@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
} }
} }
/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
*/
#define MCE_RING_SIZE 16 /* we use one entry less */
struct mce_ring {
unsigned short start;
unsigned short end;
unsigned long ring[MCE_RING_SIZE];
};
static DEFINE_PER_CPU(struct mce_ring, mce_ring);
/* Runs with CPU affinity in workqueue */
static int mce_ring_empty(void)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
return r->start == r->end;
}
static int mce_ring_get(unsigned long *pfn)
{
struct mce_ring *r;
int ret = 0;
*pfn = 0;
get_cpu();
r = this_cpu_ptr(&mce_ring);
if (r->start == r->end)
goto out;
*pfn = r->ring[r->start];
r->start = (r->start + 1) % MCE_RING_SIZE;
ret = 1;
out:
put_cpu();
return ret;
}
/* Always runs in MCE context with preempt off */
static int mce_ring_add(unsigned long pfn)
{
struct mce_ring *r = this_cpu_ptr(&mce_ring);
unsigned next;
next = (r->end + 1) % MCE_RING_SIZE;
if (next == r->start)
return -1;
r->ring[r->end] = pfn;
wmb();
r->end = next;
return 0;
}
int mce_available(struct cpuinfo_x86 *c) int mce_available(struct cpuinfo_x86 *c)
{ {
if (mca_cfg.disabled) if (mca_cfg.disabled)
@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void) static void mce_schedule_work(void)
{ {
if (!mce_ring_empty()) if (!mce_gen_pool_empty() && keventd_up())
schedule_work(this_cpu_ptr(&mce_work)); schedule_work(&mce_work);
} }
static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
static void mce_irq_work_cb(struct irq_work *entry) static void mce_irq_work_cb(struct irq_work *entry)
{ {
mce_notify_irq(); mce_notify_irq();
@ -551,9 +472,30 @@ static void mce_report_event(struct pt_regs *regs)
return; return;
} }
irq_work_queue(this_cpu_ptr(&mce_irq_work)); irq_work_queue(&mce_irq_work);
} }
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
unsigned long pfn;
if (!mce)
return NOTIFY_DONE;
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0);
}
return NOTIFY_OK;
}
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier,
.priority = INT_MAX,
};
/* /*
* Read ADDR and MISC registers. * Read ADDR and MISC registers.
*/ */
@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
*/ */
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
if (m.status & MCI_STATUS_ADDRV) { if (m.status & MCI_STATUS_ADDRV) {
mce_ring_add(m.addr >> PAGE_SHIFT); m.severity = severity;
mce_schedule_work(); m.usable_addr = mce_usable_address(&m);
if (!mce_gen_pool_add(&m))
mce_schedule_work();
} }
} }
@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_read_aux(&m, i); mce_read_aux(&m, i);
/* /* assuming valid severity level != 0 */
* Action optional error. Queue address for later processing. m.severity = severity;
* When the ring overflows we just ignore the AO error. m.usable_addr = mce_usable_address(&m);
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
* RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
mce_log(&m); mce_log(&m);
@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
/* /*
* Action optional processing happens here (picking up * Action optional processing happens here (picking up
* from the list of faulting pages that do_machine_check() * from the list of faulting pages that do_machine_check()
* placed into the "ring"). * placed into the genpool).
*/ */
static void mce_process_work(struct work_struct *dummy) static void mce_process_work(struct work_struct *dummy)
{ {
unsigned long pfn; mce_gen_pool_process();
while (mce_ring_get(&pfn))
memory_failure(pfn, MCE_VECTOR, 0);
} }
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
} }
} }
static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_clear(c);
break;
default:
break;
}
}
static void mce_start_timer(unsigned int cpu, struct timer_list *t) static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{ {
unsigned long iv = check_interval * HZ; unsigned long iv = check_interval * HZ;
@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return; return;
} }
if (mce_gen_pool_init()) {
mca_cfg.disabled = true;
pr_emerg("Couldn't allocate MCE records pool!\n");
return;
}
machine_check_vector = do_machine_check; machine_check_vector = do_machine_check;
__mcheck_cpu_init_generic(); __mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c); __mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer(); __mcheck_cpu_init_timer();
INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); }
init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
/*
* Called for each booted CPU to clear some machine checks opt-ins
*/
void mcheck_cpu_clear(struct cpuinfo_x86 *c)
{
if (mca_cfg.disabled)
return;
if (!mce_available(c))
return;
/*
* Possibly to clear general settings generic to x86
* __mcheck_cpu_clear_generic(c);
*/
__mcheck_cpu_clear_vendor(c);
} }
/* /*
@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
goto out; goto out;
} }
next = rcu_dereference_check_mce(mcelog.next); next = mce_log_get_idx_check(mcelog.next);
/* Only supports full reads right now */ /* Only supports full reads right now */
err = -EINVAL; err = -EINVAL;
@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
int __init mcheck_init(void) int __init mcheck_init(void)
{ {
mcheck_intel_therm_init(); mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb);
mcheck_vendor_init_severity(); mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work);
init_irq_work(&mce_irq_work, mce_irq_work_cb);
return 0; return 0;
} }
@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
return 0; return 0;
} }
late_initcall(mcheck_debugfs_init); #else
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
#endif #endif
static int __init mcheck_late_init(void)
{
mcheck_debugfs_init();
/*
* Flush out everything that has been logged during early boot, now that
* everything has been initialized (workqueues, decoders, ...).
*/
mce_schedule_work();
return 0;
}
late_initcall(mcheck_late_init);

View File

@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
} }
static void cmci_toggle_interrupt_mode(bool on)
{
unsigned long flags, *owned;
int bank;
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
if (on)
val |= MCI_CTL2_CMCI_EN;
else
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
unsigned long cmci_intel_adjust_timer(unsigned long interval) unsigned long cmci_intel_adjust_timer(unsigned long interval)
{ {
if ((this_cpu_read(cmci_backoff_cnt) > 0) && if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
*/ */
if (!atomic_read(&cmci_storm_on_cpus)) { if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
cmci_reenable(); cmci_toggle_interrupt_mode(true);
cmci_recheck(); cmci_recheck();
} }
return CMCI_POLL_INTERVAL; return CMCI_POLL_INTERVAL;
@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
} }
} }
static void cmci_storm_disable_banks(void)
{
unsigned long flags, *owned;
int bank;
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
owned = this_cpu_ptr(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static bool cmci_storm_detect(void) static bool cmci_storm_detect(void)
{ {
unsigned int cnt = __this_cpu_read(cmci_storm_cnt); unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
if (cnt <= CMCI_STORM_THRESHOLD) if (cnt <= CMCI_STORM_THRESHOLD)
return false; return false;
cmci_storm_disable_banks(); cmci_toggle_interrupt_mode(false);
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus); r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_STORM_INTERVAL); mce_timer_kick(CMCI_STORM_INTERVAL);
@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
return; return;
machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
mce_notify_irq();
} }
/* /*
@ -435,7 +439,7 @@ static void intel_init_cmci(void)
cmci_recheck(); cmci_recheck();
} }
void intel_init_lmce(void) static void intel_init_lmce(void)
{ {
u64 val; u64 val;
@ -448,9 +452,26 @@ void intel_init_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
} }
static void intel_clear_lmce(void)
{
u64 val;
if (!lmce_supported())
return;
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
val &= ~MCG_EXT_CTL_LMCE_EN;
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
}
void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c)
{ {
intel_init_thermal(c); intel_init_thermal(c);
intel_init_cmci(); intel_init_cmci();
intel_init_lmce(); intel_init_lmce();
} }
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
}

View File

@ -29,6 +29,7 @@
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mce.h>
/* /*
* per-CPU TSS segments. Threads are completely 'soft' on Linux, * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy)
*/ */
set_cpu_online(smp_processor_id(), false); set_cpu_online(smp_processor_id(), false);
disable_local_APIC(); disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
for (;;) for (;;)
halt(); halt();

View File

@ -30,6 +30,7 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h> #include <asm/trace/irq_vectors.h>
/* /*
* Some notes on x86 processor bugs affecting SMP operation: * Some notes on x86 processor bugs affecting SMP operation:
@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
finish: finish:
local_irq_save(flags); local_irq_save(flags);
disable_local_APIC(); disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
local_irq_restore(flags); local_irq_restore(flags);
} }

11
arch/x86/ras/Kconfig Normal file
View File

@ -0,0 +1,11 @@
config AMD_MCE_INJ
tristate "Simple MCE injection interface for AMD processors"
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
default n
help
This is a simple debugfs interface to inject MCEs and test different
aspects of the MCE handling code.
WARNING: Do not even assume this interface is staying stable!

2
arch/x86/ras/Makefile Normal file
View File

@ -0,0 +1,2 @@
obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o

View File

@ -6,7 +6,7 @@
* This file may be distributed under the terms of the GNU General Public * This file may be distributed under the terms of the GNU General Public
* License version 2. * License version 2.
* *
* Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de> * Copyright (c) 2010-15: Borislav Petkov <bp@alien8.de>
* Advanced Micro Devices Inc. * Advanced Micro Devices Inc.
*/ */
@ -19,7 +19,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/mce.h> #include <asm/mce.h>
#include "mce_amd.h" #include "../kernel/cpu/mcheck/mce-internal.h"
/* /*
* Collect all the MCi_XXX settings * Collect all the MCi_XXX settings
@ -195,7 +195,7 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_MISCV; i_mce.status |= MCI_STATUS_MISCV;
if (inj_type == SW_INJ) { if (inj_type == SW_INJ) {
amd_decode_mce(NULL, 0, &i_mce); mce_inject_log(&i_mce);
return; return;
} }

View File

@ -61,16 +61,6 @@ config EDAC_DECODE_MCE
which occur really early upon boot, before the module infrastructure which occur really early upon boot, before the module infrastructure
has been initialized. has been initialized.
config EDAC_MCE_INJ
tristate "Simple MCE injection interface"
depends on EDAC_DECODE_MCE && DEBUG_FS
default n
help
This is a simple debugfs interface to inject MCEs and test different
aspects of the MCE handling code.
WARNING: Do not even assume this interface is staying stable!
config EDAC_MM_EDAC config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting" tristate "Main Memory EDAC (Error Detection And Correction) reporting"
select RAS select RAS

View File

@ -17,7 +17,6 @@ edac_core-y += edac_pci.o edac_pci_sysfs.o
endif endif
obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_GHES) += ghes_edac.o
obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
edac_mce_amd-y := mce_amd.o edac_mce_amd-y := mce_amd.o
obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o

View File

@ -1,2 +1,35 @@
config RAS menuconfig RAS
bool bool "Reliability, Availability and Serviceability (RAS) features"
help
Reliability, availability and serviceability (RAS) is a computer
hardware engineering term. Computers designed with higher levels
of RAS have a multitude of features that protect data integrity
and help them stay available for long periods of time without
failure.
Reliability can be defined as the probability that the system will
produce correct outputs up to some given time. Reliability is
enhanced by features that help to avoid, detect and repair hardware
faults.
Availability is the probability a system is operational at a given
time, i.e. the amount of time a device is actually operating as the
percentage of total time it should be operating.
Serviceability or maintainability is the simplicity and speed with
which a system can be repaired or maintained; if the time to repair
a failed system increases, then availability will decrease.
Note that Reliability and Availability are distinct concepts:
Reliability is a measure of the ability of a system to function
correctly, including avoiding data corruption, whereas Availability
measures how often it is available for use, even though it may not
be functioning correctly. For example, a server may run forever and
so have ideal availability, but may be unreliable, with frequent
data corruption.
if RAS
source arch/x86/ras/Kconfig
endif