mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-22 09:22:37 +00:00
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "The main changes in this cycle were: - add the 'Corrected Errors Collector' kernel feature which collect and monitor correctable errors statistics and will preemptively (soft-)offline physical pages that have a suspiciously high error count. - handle MCE errors during kexec() more gracefully - factor out and deprecate the /dev/mcelog driver - ... plus misc fixes and cleanpus" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Check MCi_STATUS[MISCV] for usable addr on Intel only ACPI/APEI: Use setup_deferrable_timer() x86/mce: Update notifier priority check x86/mce: Enable PPIN for Knights Landing/Mill x86/mce: Do not register notifiers with invalid prio x86/mce: Factor out and deprecate the /dev/mcelog driver RAS: Add a Corrected Errors Collector x86/mce: Rename mce_log to mce_log_buffer x86/mce: Rename mce_log()'s argument x86/mce: Init some CPU features early x86/mce: Handle broadcasted MCE gracefully with kexec
This commit is contained in:
commit
3dee9fb2a4
@ -3177,6 +3177,12 @@
|
||||
ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
|
||||
See Documentation/blockdev/ramdisk.txt.
|
||||
|
||||
ras=option[,option,...] [KNL] RAS-specific options
|
||||
|
||||
cec_disable [X86]
|
||||
Disable the Correctable Errors Collector,
|
||||
see CONFIG_RAS_CEC help text.
|
||||
|
||||
rcu_nocbs= [KNL]
|
||||
The argument is a cpu list, as described above.
|
||||
|
||||
|
@ -1042,6 +1042,14 @@ config X86_MCE
|
||||
The action the kernel takes depends on the severity of the problem,
|
||||
ranging from warning messages to halting the machine.
|
||||
|
||||
config X86_MCELOG_LEGACY
|
||||
bool "Support for deprecated /dev/mcelog character device"
|
||||
depends on X86_MCE
|
||||
---help---
|
||||
Enable support for /dev/mcelog which is needed by the old mcelog
|
||||
userspace logging daemon. Consider switching to the new generation
|
||||
rasdaemon solution.
|
||||
|
||||
config X86_MCE_INTEL
|
||||
def_bool y
|
||||
prompt "Intel MCE features"
|
||||
@ -1071,7 +1079,7 @@ config X86_MCE_THRESHOLD
|
||||
def_bool y
|
||||
|
||||
config X86_MCE_INJECT
|
||||
depends on X86_MCE && X86_LOCAL_APIC
|
||||
depends on X86_MCE && X86_LOCAL_APIC && X86_MCELOG_LEGACY
|
||||
tristate "Machine check injector support"
|
||||
---help---
|
||||
Provide support for injecting machine checks for testing purposes.
|
||||
|
@ -128,7 +128,7 @@
|
||||
* debugging tools. Each entry is only valid when its finished flag
|
||||
* is set.
|
||||
*/
|
||||
struct mce_log {
|
||||
struct mce_log_buffer {
|
||||
char signature[12]; /* "MACHINECHECK" */
|
||||
unsigned len; /* = MCE_LOG_LEN */
|
||||
unsigned next;
|
||||
@ -191,10 +191,12 @@ extern struct mca_config mca_cfg;
|
||||
extern struct mca_msr_regs msr_ops;
|
||||
|
||||
enum mce_notifier_prios {
|
||||
MCE_PRIO_SRAO = INT_MAX,
|
||||
MCE_PRIO_EXTLOG = INT_MAX - 1,
|
||||
MCE_PRIO_NFIT = INT_MAX - 2,
|
||||
MCE_PRIO_EDAC = INT_MAX - 3,
|
||||
MCE_PRIO_FIRST = INT_MAX,
|
||||
MCE_PRIO_SRAO = INT_MAX - 1,
|
||||
MCE_PRIO_EXTLOG = INT_MAX - 2,
|
||||
MCE_PRIO_NFIT = INT_MAX - 3,
|
||||
MCE_PRIO_EDAC = INT_MAX - 4,
|
||||
MCE_PRIO_MCELOG = 1,
|
||||
MCE_PRIO_LOWEST = 0,
|
||||
};
|
||||
|
||||
|
@ -15,6 +15,7 @@ struct machine_ops {
|
||||
};
|
||||
|
||||
extern struct machine_ops machine_ops;
|
||||
extern int crashing_cpu;
|
||||
|
||||
void native_machine_crash_shutdown(struct pt_regs *regs);
|
||||
void native_machine_shutdown(void);
|
||||
|
@ -9,3 +9,5 @@ obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
||||
|
||||
obj-$(CONFIG_ACPI_APEI) += mce-apei.o
|
||||
|
||||
obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o
|
||||
|
397
arch/x86/kernel/cpu/mcheck/dev-mcelog.c
Normal file
397
arch/x86/kernel/cpu/mcheck/dev-mcelog.c
Normal file
@ -0,0 +1,397 @@
|
||||
/*
|
||||
* /dev/mcelog driver
|
||||
*
|
||||
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
|
||||
* Rest from unknown author(s).
|
||||
* 2004 Andi Kleen. Rewrote most of it.
|
||||
* Copyright 2008 Intel Corporation
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/poll.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
|
||||
static char mce_helper[128];
|
||||
static char *mce_helper_argv[2] = { mce_helper, NULL };
|
||||
|
||||
#define mce_log_get_idx_check(p) \
|
||||
({ \
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
|
||||
!lockdep_is_held(&mce_chrdev_read_mutex), \
|
||||
"suspicious mce_log_get_idx_check() usage"); \
|
||||
smp_load_acquire(&(p)); \
|
||||
})
|
||||
|
||||
/*
|
||||
* Lockless MCE logging infrastructure.
|
||||
* This avoids deadlocks on printk locks without having to break locks. Also
|
||||
* separate MCEs from kernel messages to avoid bogus bug reports.
|
||||
*/
|
||||
|
||||
static struct mce_log_buffer mcelog = {
|
||||
.signature = MCE_LOG_SIGNATURE,
|
||||
.len = MCE_LOG_LEN,
|
||||
.recordlen = sizeof(struct mce),
|
||||
};
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
|
||||
|
||||
/* User mode helper program triggered by machine check event */
|
||||
extern char mce_helper[128];
|
||||
|
||||
static int dev_mce_log(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *mce = (struct mce *)data;
|
||||
unsigned int next, entry;
|
||||
|
||||
wmb();
|
||||
for (;;) {
|
||||
entry = mce_log_get_idx_check(mcelog.next);
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
* When the buffer fills up discard new entries.
|
||||
* Assume that the earlier errors are the more
|
||||
* interesting ones:
|
||||
*/
|
||||
if (entry >= MCE_LOG_LEN) {
|
||||
set_bit(MCE_OVERFLOW,
|
||||
(unsigned long *)&mcelog.flags);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
/* Old left over entry. Skip: */
|
||||
if (mcelog.entry[entry].finished) {
|
||||
entry++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
smp_rmb();
|
||||
next = entry + 1;
|
||||
if (cmpxchg(&mcelog.next, entry, next) == entry)
|
||||
break;
|
||||
}
|
||||
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
|
||||
wmb();
|
||||
mcelog.entry[entry].finished = 1;
|
||||
wmb();
|
||||
|
||||
/* wake processes polling /dev/mcelog */
|
||||
wake_up_interruptible(&mce_chrdev_wait);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block dev_mcelog_nb = {
|
||||
.notifier_call = dev_mce_log,
|
||||
.priority = MCE_PRIO_MCELOG,
|
||||
};
|
||||
|
||||
static void mce_do_trigger(struct work_struct *work)
|
||||
{
|
||||
call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
|
||||
|
||||
|
||||
void mce_work_trigger(void)
|
||||
{
|
||||
if (mce_helper[0])
|
||||
schedule_work(&mce_trigger_work);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
show_trigger(struct device *s, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
strcpy(buf, mce_helper);
|
||||
strcat(buf, "\n");
|
||||
return strlen(mce_helper) + 1;
|
||||
}
|
||||
|
||||
static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
|
||||
const char *buf, size_t siz)
|
||||
{
|
||||
char *p;
|
||||
|
||||
strncpy(mce_helper, buf, sizeof(mce_helper));
|
||||
mce_helper[sizeof(mce_helper)-1] = 0;
|
||||
p = strchr(mce_helper, '\n');
|
||||
|
||||
if (p)
|
||||
*p = 0;
|
||||
|
||||
return strlen(mce_helper) + !!p;
|
||||
}
|
||||
|
||||
DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
|
||||
|
||||
/*
|
||||
* mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
|
||||
*/
|
||||
|
||||
static DEFINE_SPINLOCK(mce_chrdev_state_lock);
|
||||
static int mce_chrdev_open_count; /* #times opened */
|
||||
static int mce_chrdev_open_exclu; /* already open exclusive? */
|
||||
|
||||
static int mce_chrdev_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&mce_chrdev_state_lock);
|
||||
|
||||
if (mce_chrdev_open_exclu ||
|
||||
(mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (file->f_flags & O_EXCL)
|
||||
mce_chrdev_open_exclu = 1;
|
||||
mce_chrdev_open_count++;
|
||||
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return nonseekable_open(inode, file);
|
||||
}
|
||||
|
||||
static int mce_chrdev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&mce_chrdev_state_lock);
|
||||
|
||||
mce_chrdev_open_count--;
|
||||
mce_chrdev_open_exclu = 0;
|
||||
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void collect_tscs(void *data)
|
||||
{
|
||||
unsigned long *cpu_tsc = (unsigned long *)data;
|
||||
|
||||
cpu_tsc[smp_processor_id()] = rdtsc();
|
||||
}
|
||||
|
||||
static int mce_apei_read_done;
|
||||
|
||||
/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
|
||||
static int __mce_read_apei(char __user **ubuf, size_t usize)
|
||||
{
|
||||
int rc;
|
||||
u64 record_id;
|
||||
struct mce m;
|
||||
|
||||
if (usize < sizeof(struct mce))
|
||||
return -EINVAL;
|
||||
|
||||
rc = apei_read_mce(&m, &record_id);
|
||||
/* Error or no more MCE record */
|
||||
if (rc <= 0) {
|
||||
mce_apei_read_done = 1;
|
||||
/*
|
||||
* When ERST is disabled, mce_chrdev_read() should return
|
||||
* "no record" instead of "no device."
|
||||
*/
|
||||
if (rc == -ENODEV)
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
rc = -EFAULT;
|
||||
if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
|
||||
return rc;
|
||||
/*
|
||||
* In fact, we should have cleared the record after that has
|
||||
* been flushed to the disk or sent to network in
|
||||
* /sbin/mcelog, but we have no interface to support that now,
|
||||
* so just clear it to avoid duplication.
|
||||
*/
|
||||
rc = apei_clear_mce(record_id);
|
||||
if (rc) {
|
||||
mce_apei_read_done = 1;
|
||||
return rc;
|
||||
}
|
||||
*ubuf += sizeof(struct mce);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
char __user *buf = ubuf;
|
||||
unsigned long *cpu_tsc;
|
||||
unsigned prev, next;
|
||||
int i, err;
|
||||
|
||||
cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
|
||||
if (!cpu_tsc)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
|
||||
if (!mce_apei_read_done) {
|
||||
err = __mce_read_apei(&buf, usize);
|
||||
if (err || buf != ubuf)
|
||||
goto out;
|
||||
}
|
||||
|
||||
next = mce_log_get_idx_check(mcelog.next);
|
||||
|
||||
/* Only supports full reads right now */
|
||||
err = -EINVAL;
|
||||
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
prev = 0;
|
||||
do {
|
||||
for (i = prev; i < next; i++) {
|
||||
unsigned long start = jiffies;
|
||||
struct mce *m = &mcelog.entry[i];
|
||||
|
||||
while (!m->finished) {
|
||||
if (time_after_eq(jiffies, start + 2)) {
|
||||
memset(m, 0, sizeof(*m));
|
||||
goto timeout;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
err |= copy_to_user(buf, m, sizeof(*m));
|
||||
buf += sizeof(*m);
|
||||
timeout:
|
||||
;
|
||||
}
|
||||
|
||||
memset(mcelog.entry + prev, 0,
|
||||
(next - prev) * sizeof(struct mce));
|
||||
prev = next;
|
||||
next = cmpxchg(&mcelog.next, prev, 0);
|
||||
} while (next != prev);
|
||||
|
||||
synchronize_sched();
|
||||
|
||||
/*
|
||||
* Collect entries that were still getting written before the
|
||||
* synchronize.
|
||||
*/
|
||||
on_each_cpu(collect_tscs, cpu_tsc, 1);
|
||||
|
||||
for (i = next; i < MCE_LOG_LEN; i++) {
|
||||
struct mce *m = &mcelog.entry[i];
|
||||
|
||||
if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
|
||||
err |= copy_to_user(buf, m, sizeof(*m));
|
||||
smp_rmb();
|
||||
buf += sizeof(*m);
|
||||
memset(m, 0, sizeof(*m));
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
err = -EFAULT;
|
||||
|
||||
out:
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
kfree(cpu_tsc);
|
||||
|
||||
return err ? err : buf - ubuf;
|
||||
}
|
||||
|
||||
static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
poll_wait(file, &mce_chrdev_wait, wait);
|
||||
if (READ_ONCE(mcelog.next))
|
||||
return POLLIN | POLLRDNORM;
|
||||
if (!mce_apei_read_done && apei_check_mce())
|
||||
return POLLIN | POLLRDNORM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int __user *p = (int __user *)arg;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case MCE_GET_RECORD_LEN:
|
||||
return put_user(sizeof(struct mce), p);
|
||||
case MCE_GET_LOG_LEN:
|
||||
return put_user(MCE_LOG_LEN, p);
|
||||
case MCE_GETCLEAR_FLAGS: {
|
||||
unsigned flags;
|
||||
|
||||
do {
|
||||
flags = mcelog.flags;
|
||||
} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
|
||||
|
||||
return put_user(flags, p);
|
||||
}
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off);
|
||||
|
||||
void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
|
||||
const char __user *ubuf,
|
||||
size_t usize, loff_t *off))
|
||||
{
|
||||
mce_write = fn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_mce_write_callback);
|
||||
|
||||
static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
if (mce_write)
|
||||
return mce_write(filp, ubuf, usize, off);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static const struct file_operations mce_chrdev_ops = {
|
||||
.open = mce_chrdev_open,
|
||||
.release = mce_chrdev_release,
|
||||
.read = mce_chrdev_read,
|
||||
.write = mce_chrdev_write,
|
||||
.poll = mce_chrdev_poll,
|
||||
.unlocked_ioctl = mce_chrdev_ioctl,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static struct miscdevice mce_chrdev_device = {
|
||||
MISC_MCELOG_MINOR,
|
||||
"mcelog",
|
||||
&mce_chrdev_ops,
|
||||
};
|
||||
|
||||
static __init int dev_mcelog_init_device(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* register character device /dev/mcelog */
|
||||
err = misc_register(&mce_chrdev_device);
|
||||
if (err) {
|
||||
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
|
||||
return err;
|
||||
}
|
||||
mce_register_decode_chain(&dev_mcelog_nb);
|
||||
return 0;
|
||||
}
|
||||
device_initcall_sync(dev_mcelog_init_device);
|
@ -96,3 +96,11 @@ static inline bool mce_cmp(struct mce *m1, struct mce *m2)
|
||||
m1->addr != m2->addr ||
|
||||
m1->misc != m2->misc;
|
||||
}
|
||||
|
||||
extern struct device_attribute dev_attr_trigger;
|
||||
|
||||
#ifdef CONFIG_X86_MCELOG_LEGACY
|
||||
extern void mce_work_trigger(void);
|
||||
#else
|
||||
static inline void mce_work_trigger(void) { }
|
||||
#endif
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/poll.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/ras.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
@ -49,20 +50,11 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
static DEFINE_MUTEX(mce_chrdev_read_mutex);
|
||||
|
||||
static int mce_chrdev_open_count; /* #times opened */
|
||||
|
||||
#define mce_log_get_idx_check(p) \
|
||||
({ \
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
|
||||
!lockdep_is_held(&mce_chrdev_read_mutex), \
|
||||
"suspicious mce_log_get_idx_check() usage"); \
|
||||
smp_load_acquire(&(p)); \
|
||||
})
|
||||
static DEFINE_MUTEX(mce_log_mutex);
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/mce.h>
|
||||
@ -87,15 +79,9 @@ struct mca_config mca_cfg __read_mostly = {
|
||||
.monarch_timeout = -1
|
||||
};
|
||||
|
||||
/* User mode helper program triggered by machine check event */
|
||||
static unsigned long mce_need_notify;
|
||||
static char mce_helper[128];
|
||||
static char *mce_helper_argv[2] = { mce_helper, NULL };
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
|
||||
|
||||
static DEFINE_PER_CPU(struct mce, mces_seen);
|
||||
static int cpu_missing;
|
||||
static unsigned long mce_need_notify;
|
||||
static int cpu_missing;
|
||||
|
||||
/*
|
||||
* MCA banks polled by the period polling timer for corrected events.
|
||||
@ -145,80 +131,36 @@ void mce_setup(struct mce *m)
|
||||
DEFINE_PER_CPU(struct mce, injectm);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(injectm);
|
||||
|
||||
/*
|
||||
* Lockless MCE logging infrastructure.
|
||||
* This avoids deadlocks on printk locks without having to break locks. Also
|
||||
* separate MCEs from kernel messages to avoid bogus bug reports.
|
||||
*/
|
||||
|
||||
static struct mce_log mcelog = {
|
||||
.signature = MCE_LOG_SIGNATURE,
|
||||
.len = MCE_LOG_LEN,
|
||||
.recordlen = sizeof(struct mce),
|
||||
};
|
||||
|
||||
void mce_log(struct mce *mce)
|
||||
void mce_log(struct mce *m)
|
||||
{
|
||||
unsigned next, entry;
|
||||
|
||||
/* Emit the trace record: */
|
||||
trace_mce_record(mce);
|
||||
|
||||
if (!mce_gen_pool_add(mce))
|
||||
if (!mce_gen_pool_add(m))
|
||||
irq_work_queue(&mce_irq_work);
|
||||
|
||||
wmb();
|
||||
for (;;) {
|
||||
entry = mce_log_get_idx_check(mcelog.next);
|
||||
for (;;) {
|
||||
|
||||
/*
|
||||
* When the buffer fills up discard new entries.
|
||||
* Assume that the earlier errors are the more
|
||||
* interesting ones:
|
||||
*/
|
||||
if (entry >= MCE_LOG_LEN) {
|
||||
set_bit(MCE_OVERFLOW,
|
||||
(unsigned long *)&mcelog.flags);
|
||||
return;
|
||||
}
|
||||
/* Old left over entry. Skip: */
|
||||
if (mcelog.entry[entry].finished) {
|
||||
entry++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
smp_rmb();
|
||||
next = entry + 1;
|
||||
if (cmpxchg(&mcelog.next, entry, next) == entry)
|
||||
break;
|
||||
}
|
||||
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
|
||||
wmb();
|
||||
mcelog.entry[entry].finished = 1;
|
||||
wmb();
|
||||
|
||||
set_bit(0, &mce_need_notify);
|
||||
}
|
||||
|
||||
void mce_inject_log(struct mce *m)
|
||||
{
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
mutex_lock(&mce_log_mutex);
|
||||
mce_log(m);
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
mutex_unlock(&mce_log_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_inject_log);
|
||||
|
||||
static struct notifier_block mce_srao_nb;
|
||||
|
||||
/*
|
||||
* We run the default notifier if we have only the SRAO, the first and the
|
||||
* default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
|
||||
* notifiers registered on the chain.
|
||||
*/
|
||||
#define NUM_DEFAULT_NOTIFIERS 3
|
||||
static atomic_t num_notifiers;
|
||||
|
||||
void mce_register_decode_chain(struct notifier_block *nb)
|
||||
{
|
||||
atomic_inc(&num_notifiers);
|
||||
if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC))
|
||||
return;
|
||||
|
||||
WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
|
||||
atomic_inc(&num_notifiers);
|
||||
|
||||
blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
|
||||
}
|
||||
@ -510,7 +452,6 @@ static void mce_schedule_work(void)
|
||||
|
||||
static void mce_irq_work_cb(struct irq_work *entry)
|
||||
{
|
||||
mce_notify_irq();
|
||||
mce_schedule_work();
|
||||
}
|
||||
|
||||
@ -539,20 +480,97 @@ static void mce_report_event(struct pt_regs *regs)
|
||||
*/
|
||||
static int mce_usable_address(struct mce *m)
|
||||
{
|
||||
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
|
||||
if (!(m->status & MCI_STATUS_ADDRV))
|
||||
return 0;
|
||||
|
||||
/* Checks after this one are Intel-specific: */
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return 1;
|
||||
|
||||
if (!(m->status & MCI_STATUS_MISCV))
|
||||
return 0;
|
||||
|
||||
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
|
||||
return 0;
|
||||
|
||||
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool memory_error(struct mce *m)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
/* ErrCodeExt[20:16] */
|
||||
u8 xec = (m->status >> 16) & 0x1f;
|
||||
|
||||
return (xec == 0x0 || xec == 0x8);
|
||||
} else if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
/*
|
||||
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
|
||||
*
|
||||
* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
|
||||
* indicating a memory error. Bit 8 is used for indicating a
|
||||
* cache hierarchy error. The combination of bit 2 and bit 3
|
||||
* is used for indicating a `generic' cache hierarchy error
|
||||
* But we can't just blindly check the above bits, because if
|
||||
* bit 11 is set, then it is a bus/interconnect error - and
|
||||
* either way the above bits just gives more detail on what
|
||||
* bus/interconnect error happened. Note that bit 12 can be
|
||||
* ignored, as it's the "filter" bit.
|
||||
*/
|
||||
return (m->status & 0xef80) == BIT(7) ||
|
||||
(m->status & 0xef00) == BIT(8) ||
|
||||
(m->status & 0xeffc) == 0xc;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool cec_add_mce(struct mce *m)
|
||||
{
|
||||
if (!m)
|
||||
return false;
|
||||
|
||||
/* We eat only correctable DRAM errors with usable addresses. */
|
||||
if (memory_error(m) &&
|
||||
!(m->status & MCI_STATUS_UC) &&
|
||||
mce_usable_address(m))
|
||||
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
struct mce *m = (struct mce *)data;
|
||||
|
||||
if (!m)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (cec_add_mce(m))
|
||||
return NOTIFY_STOP;
|
||||
|
||||
/* Emit the trace record: */
|
||||
trace_mce_record(m);
|
||||
|
||||
set_bit(0, &mce_need_notify);
|
||||
|
||||
mce_notify_irq();
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block first_nb = {
|
||||
.notifier_call = mce_first_notifier,
|
||||
.priority = MCE_PRIO_FIRST,
|
||||
};
|
||||
|
||||
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
@ -582,15 +600,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
|
||||
if (!m)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/*
|
||||
* Run the default notifier if we have only the SRAO
|
||||
* notifier and us registered.
|
||||
*/
|
||||
if (atomic_read(&num_notifiers) > 2)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* Don't print when mcelog is running */
|
||||
if (mce_chrdev_open_count > 0)
|
||||
if (atomic_read(&num_notifiers) > NUM_DEFAULT_NOTIFIERS)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
__print_mce(m);
|
||||
@ -643,37 +653,6 @@ static void mce_read_aux(struct mce *m, int i)
|
||||
}
|
||||
}
|
||||
|
||||
static bool memory_error(struct mce *m)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
/* ErrCodeExt[20:16] */
|
||||
u8 xec = (m->status >> 16) & 0x1f;
|
||||
|
||||
return (xec == 0x0 || xec == 0x8);
|
||||
} else if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
/*
|
||||
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
|
||||
*
|
||||
* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
|
||||
* indicating a memory error. Bit 8 is used for indicating a
|
||||
* cache hierarchy error. The combination of bit 2 and bit 3
|
||||
* is used for indicating a `generic' cache hierarchy error
|
||||
* But we can't just blindly check the above bits, because if
|
||||
* bit 11 is set, then it is a bus/interconnect error - and
|
||||
* either way the above bits just gives more detail on what
|
||||
* bus/interconnect error happened. Note that bit 12 can be
|
||||
* ignored, as it's the "filter" bit.
|
||||
*/
|
||||
return (m->status & 0xef80) == BIT(7) ||
|
||||
(m->status & 0xef00) == BIT(8) ||
|
||||
(m->status & 0xeffc) == 0xc;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
||||
/*
|
||||
@ -1122,9 +1101,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* on Intel.
|
||||
*/
|
||||
int lmce = 1;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* If this CPU is offline, just bail out. */
|
||||
if (cpu_is_offline(smp_processor_id())) {
|
||||
/*
|
||||
* Cases where we avoid rendezvous handler timeout:
|
||||
* 1) If this CPU is offline.
|
||||
*
|
||||
* 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
|
||||
* skip those CPUs which remain looping in the 1st kernel - see
|
||||
* crash_nmi_callback().
|
||||
*
|
||||
* Note: there still is a small window between kexec-ing and the new,
|
||||
* kdump kernel establishing a new #MC handler where a broadcasted MCE
|
||||
* might not get handled properly.
|
||||
*/
|
||||
if (cpu_is_offline(cpu) ||
|
||||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
|
||||
u64 mcgstatus;
|
||||
|
||||
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
@ -1394,13 +1386,6 @@ static void mce_timer_delete_all(void)
|
||||
del_timer_sync(&per_cpu(mce_timer, cpu));
|
||||
}
|
||||
|
||||
static void mce_do_trigger(struct work_struct *work)
|
||||
{
|
||||
call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
|
||||
|
||||
/*
|
||||
* Notify the user(s) about new machine check events.
|
||||
* Can be called from interrupt context, but not from machine check/NMI
|
||||
@ -1412,11 +1397,7 @@ int mce_notify_irq(void)
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
|
||||
|
||||
if (test_and_clear_bit(0, &mce_need_notify)) {
|
||||
/* wake processes polling /dev/mcelog */
|
||||
wake_up_interruptible(&mce_chrdev_wait);
|
||||
|
||||
if (mce_helper[0])
|
||||
schedule_work(&mce_trigger_work);
|
||||
mce_work_trigger();
|
||||
|
||||
if (__ratelimit(&ratelimit))
|
||||
pr_info(HW_ERR "Machine check events logged\n");
|
||||
@ -1683,6 +1664,25 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Init basic CPU features needed for early decoding of MCEs.
|
||||
*/
|
||||
static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
|
||||
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
|
||||
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
|
||||
|
||||
if (mce_flags.smca) {
|
||||
msr_ops.ctl = smca_ctl_reg;
|
||||
msr_ops.status = smca_status_reg;
|
||||
msr_ops.addr = smca_addr_reg;
|
||||
msr_ops.misc = smca_misc_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_vendor) {
|
||||
@ -1692,21 +1692,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
||||
break;
|
||||
|
||||
case X86_VENDOR_AMD: {
|
||||
mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
|
||||
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
|
||||
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
|
||||
|
||||
/*
|
||||
* Install proper ops for Scalable MCA enabled processors
|
||||
*/
|
||||
if (mce_flags.smca) {
|
||||
msr_ops.ctl = smca_ctl_reg;
|
||||
msr_ops.status = smca_status_reg;
|
||||
msr_ops.addr = smca_addr_reg;
|
||||
msr_ops.misc = smca_misc_reg;
|
||||
}
|
||||
mce_amd_feature_init(c);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1793,6 +1779,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
|
||||
machine_check_vector = do_machine_check;
|
||||
|
||||
__mcheck_cpu_init_early(c);
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_clear_banks();
|
||||
@ -1818,251 +1805,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c)
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
|
||||
*/
|
||||
|
||||
static DEFINE_SPINLOCK(mce_chrdev_state_lock);
|
||||
static int mce_chrdev_open_exclu; /* already open exclusive? */
|
||||
|
||||
static int mce_chrdev_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&mce_chrdev_state_lock);
|
||||
|
||||
if (mce_chrdev_open_exclu ||
|
||||
(mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (file->f_flags & O_EXCL)
|
||||
mce_chrdev_open_exclu = 1;
|
||||
mce_chrdev_open_count++;
|
||||
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return nonseekable_open(inode, file);
|
||||
}
|
||||
|
||||
static int mce_chrdev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
spin_lock(&mce_chrdev_state_lock);
|
||||
|
||||
mce_chrdev_open_count--;
|
||||
mce_chrdev_open_exclu = 0;
|
||||
|
||||
spin_unlock(&mce_chrdev_state_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void collect_tscs(void *data)
|
||||
{
|
||||
unsigned long *cpu_tsc = (unsigned long *)data;
|
||||
|
||||
cpu_tsc[smp_processor_id()] = rdtsc();
|
||||
}
|
||||
|
||||
static int mce_apei_read_done;
|
||||
|
||||
/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
|
||||
static int __mce_read_apei(char __user **ubuf, size_t usize)
|
||||
{
|
||||
int rc;
|
||||
u64 record_id;
|
||||
struct mce m;
|
||||
|
||||
if (usize < sizeof(struct mce))
|
||||
return -EINVAL;
|
||||
|
||||
rc = apei_read_mce(&m, &record_id);
|
||||
/* Error or no more MCE record */
|
||||
if (rc <= 0) {
|
||||
mce_apei_read_done = 1;
|
||||
/*
|
||||
* When ERST is disabled, mce_chrdev_read() should return
|
||||
* "no record" instead of "no device."
|
||||
*/
|
||||
if (rc == -ENODEV)
|
||||
return 0;
|
||||
return rc;
|
||||
}
|
||||
rc = -EFAULT;
|
||||
if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
|
||||
return rc;
|
||||
/*
|
||||
* In fact, we should have cleared the record after that has
|
||||
* been flushed to the disk or sent to network in
|
||||
* /sbin/mcelog, but we have no interface to support that now,
|
||||
* so just clear it to avoid duplication.
|
||||
*/
|
||||
rc = apei_clear_mce(record_id);
|
||||
if (rc) {
|
||||
mce_apei_read_done = 1;
|
||||
return rc;
|
||||
}
|
||||
*ubuf += sizeof(struct mce);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
char __user *buf = ubuf;
|
||||
unsigned long *cpu_tsc;
|
||||
unsigned prev, next;
|
||||
int i, err;
|
||||
|
||||
cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
|
||||
if (!cpu_tsc)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
|
||||
if (!mce_apei_read_done) {
|
||||
err = __mce_read_apei(&buf, usize);
|
||||
if (err || buf != ubuf)
|
||||
goto out;
|
||||
}
|
||||
|
||||
next = mce_log_get_idx_check(mcelog.next);
|
||||
|
||||
/* Only supports full reads right now */
|
||||
err = -EINVAL;
|
||||
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
prev = 0;
|
||||
do {
|
||||
for (i = prev; i < next; i++) {
|
||||
unsigned long start = jiffies;
|
||||
struct mce *m = &mcelog.entry[i];
|
||||
|
||||
while (!m->finished) {
|
||||
if (time_after_eq(jiffies, start + 2)) {
|
||||
memset(m, 0, sizeof(*m));
|
||||
goto timeout;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
smp_rmb();
|
||||
err |= copy_to_user(buf, m, sizeof(*m));
|
||||
buf += sizeof(*m);
|
||||
timeout:
|
||||
;
|
||||
}
|
||||
|
||||
memset(mcelog.entry + prev, 0,
|
||||
(next - prev) * sizeof(struct mce));
|
||||
prev = next;
|
||||
next = cmpxchg(&mcelog.next, prev, 0);
|
||||
} while (next != prev);
|
||||
|
||||
synchronize_sched();
|
||||
|
||||
/*
|
||||
* Collect entries that were still getting written before the
|
||||
* synchronize.
|
||||
*/
|
||||
on_each_cpu(collect_tscs, cpu_tsc, 1);
|
||||
|
||||
for (i = next; i < MCE_LOG_LEN; i++) {
|
||||
struct mce *m = &mcelog.entry[i];
|
||||
|
||||
if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
|
||||
err |= copy_to_user(buf, m, sizeof(*m));
|
||||
smp_rmb();
|
||||
buf += sizeof(*m);
|
||||
memset(m, 0, sizeof(*m));
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
err = -EFAULT;
|
||||
|
||||
out:
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
kfree(cpu_tsc);
|
||||
|
||||
return err ? err : buf - ubuf;
|
||||
}
|
||||
|
||||
static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
poll_wait(file, &mce_chrdev_wait, wait);
|
||||
if (READ_ONCE(mcelog.next))
|
||||
return POLLIN | POLLRDNORM;
|
||||
if (!mce_apei_read_done && apei_check_mce())
|
||||
return POLLIN | POLLRDNORM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int __user *p = (int __user *)arg;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case MCE_GET_RECORD_LEN:
|
||||
return put_user(sizeof(struct mce), p);
|
||||
case MCE_GET_LOG_LEN:
|
||||
return put_user(MCE_LOG_LEN, p);
|
||||
case MCE_GETCLEAR_FLAGS: {
|
||||
unsigned flags;
|
||||
|
||||
do {
|
||||
flags = mcelog.flags;
|
||||
} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
|
||||
|
||||
return put_user(flags, p);
|
||||
}
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off);
|
||||
|
||||
void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
|
||||
const char __user *ubuf,
|
||||
size_t usize, loff_t *off))
|
||||
{
|
||||
mce_write = fn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_mce_write_callback);
|
||||
|
||||
static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
if (mce_write)
|
||||
return mce_write(filp, ubuf, usize, off);
|
||||
else
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static const struct file_operations mce_chrdev_ops = {
|
||||
.open = mce_chrdev_open,
|
||||
.release = mce_chrdev_release,
|
||||
.read = mce_chrdev_read,
|
||||
.write = mce_chrdev_write,
|
||||
.poll = mce_chrdev_poll,
|
||||
.unlocked_ioctl = mce_chrdev_ioctl,
|
||||
.llseek = no_llseek,
|
||||
};
|
||||
|
||||
static struct miscdevice mce_chrdev_device = {
|
||||
MISC_MCELOG_MINOR,
|
||||
"mcelog",
|
||||
&mce_chrdev_ops,
|
||||
};
|
||||
|
||||
static void __mce_disable_bank(void *arg)
|
||||
{
|
||||
int bank = *((int *)arg);
|
||||
@ -2136,6 +1878,7 @@ __setup("mce", mcheck_enable);
|
||||
int __init mcheck_init(void)
|
||||
{
|
||||
mcheck_intel_therm_init();
|
||||
mce_register_decode_chain(&first_nb);
|
||||
mce_register_decode_chain(&mce_srao_nb);
|
||||
mce_register_decode_chain(&mce_default_nb);
|
||||
mcheck_vendor_init_severity();
|
||||
@ -2280,29 +2023,6 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
show_trigger(struct device *s, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
strcpy(buf, mce_helper);
|
||||
strcat(buf, "\n");
|
||||
return strlen(mce_helper) + 1;
|
||||
}
|
||||
|
||||
static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
|
||||
const char *buf, size_t siz)
|
||||
{
|
||||
char *p;
|
||||
|
||||
strncpy(mce_helper, buf, sizeof(mce_helper));
|
||||
mce_helper[sizeof(mce_helper)-1] = 0;
|
||||
p = strchr(mce_helper, '\n');
|
||||
|
||||
if (p)
|
||||
*p = 0;
|
||||
|
||||
return strlen(mce_helper) + !!p;
|
||||
}
|
||||
|
||||
static ssize_t set_ignore_ce(struct device *s,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
@ -2359,7 +2079,6 @@ static ssize_t store_int_with_restart(struct device *s,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
|
||||
static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
|
||||
static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
|
||||
static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
|
||||
@ -2382,7 +2101,9 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
||||
static struct device_attribute *mce_device_attrs[] = {
|
||||
&dev_attr_tolerant.attr,
|
||||
&dev_attr_check_interval.attr,
|
||||
#ifdef CONFIG_X86_MCELOG_LEGACY
|
||||
&dev_attr_trigger,
|
||||
#endif
|
||||
&dev_attr_monarch_timeout.attr,
|
||||
&dev_attr_dont_log_ce.attr,
|
||||
&dev_attr_ignore_ce.attr,
|
||||
@ -2556,7 +2277,6 @@ static __init void mce_init_banks(void)
|
||||
|
||||
static __init int mcheck_init_device(void)
|
||||
{
|
||||
enum cpuhp_state hp_online;
|
||||
int err;
|
||||
|
||||
if (!mce_available(&boot_cpu_data)) {
|
||||
@ -2584,21 +2304,11 @@ static __init int mcheck_init_device(void)
|
||||
mce_cpu_online, mce_cpu_pre_down);
|
||||
if (err < 0)
|
||||
goto err_out_online;
|
||||
hp_online = err;
|
||||
|
||||
register_syscore_ops(&mce_syscore_ops);
|
||||
|
||||
/* register character device /dev/mcelog */
|
||||
err = misc_register(&mce_chrdev_device);
|
||||
if (err)
|
||||
goto err_register;
|
||||
|
||||
return 0;
|
||||
|
||||
err_register:
|
||||
unregister_syscore_ops(&mce_syscore_ops);
|
||||
cpuhp_remove_state(hp_online);
|
||||
|
||||
err_out_online:
|
||||
cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
|
||||
|
||||
@ -2606,7 +2316,7 @@ err_out_mem:
|
||||
free_cpumask_var(mce_device_initialized);
|
||||
|
||||
err_out:
|
||||
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
|
||||
pr_err("Unable to init MCE device (rc: %d)\n", err);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -2685,6 +2395,7 @@ static int __init mcheck_late_init(void)
|
||||
static_branch_inc(&mcsafe_key);
|
||||
|
||||
mcheck_debugfs_init();
|
||||
cec_init();
|
||||
|
||||
/*
|
||||
* Flush out everything that has been logged during early boot, now that
|
||||
|
@ -481,6 +481,9 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
|
||||
case INTEL_FAM6_BROADWELL_XEON_D:
|
||||
case INTEL_FAM6_BROADWELL_X:
|
||||
case INTEL_FAM6_SKYLAKE_X:
|
||||
case INTEL_FAM6_XEON_PHI_KNL:
|
||||
case INTEL_FAM6_XEON_PHI_KNM:
|
||||
|
||||
if (rdmsrl_safe(MSR_PPIN_CTL, &val))
|
||||
return;
|
||||
|
||||
|
@ -765,10 +765,11 @@ void machine_crash_shutdown(struct pt_regs *regs)
|
||||
#endif
|
||||
|
||||
|
||||
/* This is the CPU performing the emergency shutdown work. */
|
||||
int crashing_cpu = -1;
|
||||
|
||||
#if defined(CONFIG_SMP)
|
||||
|
||||
/* This keeps a track of which one is crashing cpu. */
|
||||
static int crashing_cpu;
|
||||
static nmi_shootdown_cb shootdown_callback;
|
||||
|
||||
static atomic_t waiting_for_crash_ipi;
|
||||
|
@ -7,3 +7,17 @@ config MCE_AMD_INJ
|
||||
aspects of the MCE handling code.
|
||||
|
||||
WARNING: Do not even assume this interface is staying stable!
|
||||
|
||||
config RAS_CEC
|
||||
bool "Correctable Errors Collector"
|
||||
depends on X86_MCE && MEMORY_FAILURE && DEBUG_FS
|
||||
---help---
|
||||
This is a small cache which collects correctable memory errors per 4K
|
||||
page PFN and counts their repeated occurrence. Once the counter for a
|
||||
PFN overflows, we try to soft-offline that page as we take it to mean
|
||||
that it has reached a relatively high error count and would probably
|
||||
be best if we don't use it anymore.
|
||||
|
||||
Bear in mind that this is absolutely useless if your platform doesn't
|
||||
have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
|
||||
|
||||
|
@ -1005,9 +1005,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
|
||||
|
||||
switch (generic->notify.type) {
|
||||
case ACPI_HEST_NOTIFY_POLLED:
|
||||
ghes->timer.function = ghes_poll_func;
|
||||
ghes->timer.data = (unsigned long)ghes;
|
||||
init_timer_deferrable(&ghes->timer);
|
||||
setup_deferrable_timer(&ghes->timer, ghes_poll_func,
|
||||
(unsigned long)ghes);
|
||||
ghes_add_timer(ghes);
|
||||
break;
|
||||
case ACPI_HEST_NOTIFY_EXTERNAL:
|
||||
|
@ -1 +1,2 @@
|
||||
obj-$(CONFIG_RAS) += ras.o debugfs.o
|
||||
obj-$(CONFIG_RAS) += ras.o debugfs.o
|
||||
obj-$(CONFIG_RAS_CEC) += cec.o
|
||||
|
532
drivers/ras/cec.c
Normal file
532
drivers/ras/cec.c
Normal file
@ -0,0 +1,532 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "debugfs.h"
|
||||
|
||||
/*
|
||||
* RAS Correctable Errors Collector
|
||||
*
|
||||
* This is a simple gadget which collects correctable errors and counts their
|
||||
* occurrence per physical page address.
|
||||
*
|
||||
* We've opted for possibly the simplest data structure to collect those - an
|
||||
* array of the size of a memory page. It stores 512 u64's with the following
|
||||
* structure:
|
||||
*
|
||||
* [63 ... PFN ... 12 | 11 ... generation ... 10 | 9 ... count ... 0]
|
||||
*
|
||||
* The generation in the two highest order bits is two bits which are set to 11b
|
||||
* on every insertion. During the course of each entry's existence, the
|
||||
* generation field gets decremented during spring cleaning to 10b, then 01b and
|
||||
* then 00b.
|
||||
*
|
||||
* This way we're employing the natural numeric ordering to make sure that newly
|
||||
* inserted/touched elements have higher 12-bit counts (which we've manufactured)
|
||||
* and thus iterating over the array initially won't kick out those elements
|
||||
* which were inserted last.
|
||||
*
|
||||
* Spring cleaning is what we do when we reach a certain number CLEAN_ELEMS of
|
||||
* elements entered into the array, during which, we're decaying all elements.
|
||||
* If, after decay, an element gets inserted again, its generation is set to 11b
|
||||
* to make sure it has higher numerical count than other, older elements and
|
||||
* thus emulate an an LRU-like behavior when deleting elements to free up space
|
||||
* in the page.
|
||||
*
|
||||
* When an element reaches it's max count of count_threshold, we try to poison
|
||||
* it by assuming that errors triggered count_threshold times in a single page
|
||||
* are excessive and that page shouldn't be used anymore. count_threshold is
|
||||
* initialized to COUNT_MASK which is the maximum.
|
||||
*
|
||||
* That error event entry causes cec_add_elem() to return !0 value and thus
|
||||
* signal to its callers to log the error.
|
||||
*
|
||||
* To the question why we've chosen a page and moving elements around with
|
||||
* memmove(), it is because it is a very simple structure to handle and max data
|
||||
* movement is 4K which on highly optimized modern CPUs is almost unnoticeable.
|
||||
* We wanted to avoid the pointer traversal of more complex structures like a
|
||||
* linked list or some sort of a balancing search tree.
|
||||
*
|
||||
* Deleting an element takes O(n) but since it is only a single page, it should
|
||||
* be fast enough and it shouldn't happen all too often depending on error
|
||||
* patterns.
|
||||
*/
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "RAS: " fmt
|
||||
|
||||
/*
|
||||
* We use DECAY_BITS bits of PAGE_SHIFT bits for counting decay, i.e., how long
|
||||
* elements have stayed in the array without having been accessed again.
|
||||
*/
|
||||
#define DECAY_BITS 2
|
||||
#define DECAY_MASK ((1ULL << DECAY_BITS) - 1)
|
||||
#define MAX_ELEMS (PAGE_SIZE / sizeof(u64))
|
||||
|
||||
/*
|
||||
* Threshold amount of inserted elements after which we start spring
|
||||
* cleaning.
|
||||
*/
|
||||
#define CLEAN_ELEMS (MAX_ELEMS >> DECAY_BITS)
|
||||
|
||||
/* Bits which count the number of errors happened in this 4K page. */
|
||||
#define COUNT_BITS (PAGE_SHIFT - DECAY_BITS)
|
||||
#define COUNT_MASK ((1ULL << COUNT_BITS) - 1)
|
||||
#define FULL_COUNT_MASK (PAGE_SIZE - 1)
|
||||
|
||||
/*
|
||||
* u64: [ 63 ... 12 | DECAY_BITS | COUNT_BITS ]
|
||||
*/
|
||||
|
||||
#define PFN(e) ((e) >> PAGE_SHIFT)
|
||||
#define DECAY(e) (((e) >> COUNT_BITS) & DECAY_MASK)
|
||||
#define COUNT(e) ((unsigned int)(e) & COUNT_MASK)
|
||||
#define FULL_COUNT(e) ((e) & (PAGE_SIZE - 1))
|
||||
|
||||
static struct ce_array {
|
||||
u64 *array; /* container page */
|
||||
unsigned int n; /* number of elements in the array */
|
||||
|
||||
unsigned int decay_count; /*
|
||||
* number of element insertions/increments
|
||||
* since the last spring cleaning.
|
||||
*/
|
||||
|
||||
u64 pfns_poisoned; /*
|
||||
* number of PFNs which got poisoned.
|
||||
*/
|
||||
|
||||
u64 ces_entered; /*
|
||||
* The number of correctable errors
|
||||
* entered into the collector.
|
||||
*/
|
||||
|
||||
u64 decays_done; /*
|
||||
* Times we did spring cleaning.
|
||||
*/
|
||||
|
||||
union {
|
||||
struct {
|
||||
__u32 disabled : 1, /* cmdline disabled */
|
||||
__resv : 31;
|
||||
};
|
||||
__u32 flags;
|
||||
};
|
||||
} ce_arr;
|
||||
|
||||
static DEFINE_MUTEX(ce_mutex);
|
||||
static u64 dfs_pfn;
|
||||
|
||||
/* Amount of errors after which we offline */
|
||||
static unsigned int count_threshold = COUNT_MASK;
|
||||
|
||||
/*
|
||||
* The timer "decays" element count each timer_interval which is 24hrs by
|
||||
* default.
|
||||
*/
|
||||
|
||||
#define CEC_TIMER_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */
|
||||
#define CEC_TIMER_MIN_INTERVAL 1 * 60 * 60 /* 1h */
|
||||
#define CEC_TIMER_MAX_INTERVAL 30 * 24 * 60 * 60 /* one month */
|
||||
static struct timer_list cec_timer;
|
||||
static u64 timer_interval = CEC_TIMER_DEFAULT_INTERVAL;
|
||||
|
||||
/*
|
||||
* Decrement decay value. We're using DECAY_BITS bits to denote decay of an
|
||||
* element in the array. On insertion and any access, it gets reset to max.
|
||||
*/
|
||||
static void do_spring_cleaning(struct ce_array *ca)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ca->n; i++) {
|
||||
u8 decay = DECAY(ca->array[i]);
|
||||
|
||||
if (!decay)
|
||||
continue;
|
||||
|
||||
decay--;
|
||||
|
||||
ca->array[i] &= ~(DECAY_MASK << COUNT_BITS);
|
||||
ca->array[i] |= (decay << COUNT_BITS);
|
||||
}
|
||||
ca->decay_count = 0;
|
||||
ca->decays_done++;
|
||||
}
|
||||
|
||||
/*
|
||||
* @interval in seconds
|
||||
*/
|
||||
static void cec_mod_timer(struct timer_list *t, unsigned long interval)
|
||||
{
|
||||
unsigned long iv;
|
||||
|
||||
iv = interval * HZ + jiffies;
|
||||
|
||||
mod_timer(t, round_jiffies(iv));
|
||||
}
|
||||
|
||||
static void cec_timer_fn(unsigned long data)
|
||||
{
|
||||
struct ce_array *ca = (struct ce_array *)data;
|
||||
|
||||
do_spring_cleaning(ca);
|
||||
|
||||
cec_mod_timer(&cec_timer, timer_interval);
|
||||
}
|
||||
|
||||
/*
|
||||
* @to: index of the smallest element which is >= then @pfn.
|
||||
*
|
||||
* Return the index of the pfn if found, otherwise negative value.
|
||||
*/
|
||||
static int __find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
|
||||
{
|
||||
u64 this_pfn;
|
||||
int min = 0, max = ca->n;
|
||||
|
||||
while (min < max) {
|
||||
int tmp = (max + min) >> 1;
|
||||
|
||||
this_pfn = PFN(ca->array[tmp]);
|
||||
|
||||
if (this_pfn < pfn)
|
||||
min = tmp + 1;
|
||||
else if (this_pfn > pfn)
|
||||
max = tmp;
|
||||
else {
|
||||
min = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (to)
|
||||
*to = min;
|
||||
|
||||
this_pfn = PFN(ca->array[min]);
|
||||
|
||||
if (this_pfn == pfn)
|
||||
return min;
|
||||
|
||||
return -ENOKEY;
|
||||
}
|
||||
|
||||
static int find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
|
||||
{
|
||||
WARN_ON(!to);
|
||||
|
||||
if (!ca->n) {
|
||||
*to = 0;
|
||||
return -ENOKEY;
|
||||
}
|
||||
return __find_elem(ca, pfn, to);
|
||||
}
|
||||
|
||||
static void del_elem(struct ce_array *ca, int idx)
|
||||
{
|
||||
/* Save us a function call when deleting the last element. */
|
||||
if (ca->n - (idx + 1))
|
||||
memmove((void *)&ca->array[idx],
|
||||
(void *)&ca->array[idx + 1],
|
||||
(ca->n - (idx + 1)) * sizeof(u64));
|
||||
|
||||
ca->n--;
|
||||
}
|
||||
|
||||
static u64 del_lru_elem_unlocked(struct ce_array *ca)
|
||||
{
|
||||
unsigned int min = FULL_COUNT_MASK;
|
||||
int i, min_idx = 0;
|
||||
|
||||
for (i = 0; i < ca->n; i++) {
|
||||
unsigned int this = FULL_COUNT(ca->array[i]);
|
||||
|
||||
if (min > this) {
|
||||
min = this;
|
||||
min_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
del_elem(ca, min_idx);
|
||||
|
||||
return PFN(ca->array[min_idx]);
|
||||
}
|
||||
|
||||
/*
|
||||
* We return the 0th pfn in the error case under the assumption that it cannot
|
||||
* be poisoned and excessive CEs in there are a serious deal anyway.
|
||||
*/
|
||||
static u64 __maybe_unused del_lru_elem(void)
|
||||
{
|
||||
struct ce_array *ca = &ce_arr;
|
||||
u64 pfn;
|
||||
|
||||
if (!ca->n)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&ce_mutex);
|
||||
pfn = del_lru_elem_unlocked(ca);
|
||||
mutex_unlock(&ce_mutex);
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
|
||||
int cec_add_elem(u64 pfn)
|
||||
{
|
||||
struct ce_array *ca = &ce_arr;
|
||||
unsigned int to;
|
||||
int count, ret = 0;
|
||||
|
||||
/*
|
||||
* We can be called very early on the identify_cpu() path where we are
|
||||
* not initialized yet. We ignore the error for simplicity.
|
||||
*/
|
||||
if (!ce_arr.array || ce_arr.disabled)
|
||||
return -ENODEV;
|
||||
|
||||
ca->ces_entered++;
|
||||
|
||||
mutex_lock(&ce_mutex);
|
||||
|
||||
if (ca->n == MAX_ELEMS)
|
||||
WARN_ON(!del_lru_elem_unlocked(ca));
|
||||
|
||||
ret = find_elem(ca, pfn, &to);
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* Shift range [to-end] to make room for one more element.
|
||||
*/
|
||||
memmove((void *)&ca->array[to + 1],
|
||||
(void *)&ca->array[to],
|
||||
(ca->n - to) * sizeof(u64));
|
||||
|
||||
ca->array[to] = (pfn << PAGE_SHIFT) |
|
||||
(DECAY_MASK << COUNT_BITS) | 1;
|
||||
|
||||
ca->n++;
|
||||
|
||||
ret = 0;
|
||||
|
||||
goto decay;
|
||||
}
|
||||
|
||||
count = COUNT(ca->array[to]);
|
||||
|
||||
if (count < count_threshold) {
|
||||
ca->array[to] |= (DECAY_MASK << COUNT_BITS);
|
||||
ca->array[to]++;
|
||||
|
||||
ret = 0;
|
||||
} else {
|
||||
u64 pfn = ca->array[to] >> PAGE_SHIFT;
|
||||
|
||||
if (!pfn_valid(pfn)) {
|
||||
pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
|
||||
} else {
|
||||
/* We have reached max count for this page, soft-offline it. */
|
||||
pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
|
||||
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
|
||||
ca->pfns_poisoned++;
|
||||
}
|
||||
|
||||
del_elem(ca, to);
|
||||
|
||||
/*
|
||||
* Return a >0 value to denote that we've reached the offlining
|
||||
* threshold.
|
||||
*/
|
||||
ret = 1;
|
||||
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
decay:
|
||||
ca->decay_count++;
|
||||
|
||||
if (ca->decay_count >= CLEAN_ELEMS)
|
||||
do_spring_cleaning(ca);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&ce_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int u64_get(void *data, u64 *val)
|
||||
{
|
||||
*val = *(u64 *)data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pfn_set(void *data, u64 val)
|
||||
{
|
||||
*(u64 *)data = val;
|
||||
|
||||
return cec_add_elem(val);
|
||||
}
|
||||
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
|
||||
|
||||
static int decay_interval_set(void *data, u64 val)
|
||||
{
|
||||
*(u64 *)data = val;
|
||||
|
||||
if (val < CEC_TIMER_MIN_INTERVAL)
|
||||
return -EINVAL;
|
||||
|
||||
if (val > CEC_TIMER_MAX_INTERVAL)
|
||||
return -EINVAL;
|
||||
|
||||
timer_interval = val;
|
||||
|
||||
cec_mod_timer(&cec_timer, timer_interval);
|
||||
return 0;
|
||||
}
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
|
||||
|
||||
static int count_threshold_set(void *data, u64 val)
|
||||
{
|
||||
*(u64 *)data = val;
|
||||
|
||||
if (val > COUNT_MASK)
|
||||
val = COUNT_MASK;
|
||||
|
||||
count_threshold = val;
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n");
|
||||
|
||||
static int array_dump(struct seq_file *m, void *v)
|
||||
{
|
||||
struct ce_array *ca = &ce_arr;
|
||||
u64 prev = 0;
|
||||
int i;
|
||||
|
||||
mutex_lock(&ce_mutex);
|
||||
|
||||
seq_printf(m, "{ n: %d\n", ca->n);
|
||||
for (i = 0; i < ca->n; i++) {
|
||||
u64 this = PFN(ca->array[i]);
|
||||
|
||||
seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
|
||||
|
||||
WARN_ON(prev > this);
|
||||
|
||||
prev = this;
|
||||
}
|
||||
|
||||
seq_printf(m, "}\n");
|
||||
|
||||
seq_printf(m, "Stats:\nCEs: %llu\nofflined pages: %llu\n",
|
||||
ca->ces_entered, ca->pfns_poisoned);
|
||||
|
||||
seq_printf(m, "Flags: 0x%x\n", ca->flags);
|
||||
|
||||
seq_printf(m, "Timer interval: %lld seconds\n", timer_interval);
|
||||
seq_printf(m, "Decays: %lld\n", ca->decays_done);
|
||||
|
||||
seq_printf(m, "Action threshold: %d\n", count_threshold);
|
||||
|
||||
mutex_unlock(&ce_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int array_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, array_dump, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations array_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = array_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init create_debugfs_nodes(void)
|
||||
{
|
||||
struct dentry *d, *pfn, *decay, *count, *array;
|
||||
|
||||
d = debugfs_create_dir("cec", ras_debugfs_dir);
|
||||
if (!d) {
|
||||
pr_warn("Error creating cec debugfs node!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
|
||||
if (!pfn) {
|
||||
pr_warn("Error creating pfn debugfs node!\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
|
||||
if (!array) {
|
||||
pr_warn("Error creating array debugfs node!\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
|
||||
&timer_interval, &decay_interval_ops);
|
||||
if (!decay) {
|
||||
pr_warn("Error creating decay_interval debugfs node!\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d,
|
||||
&count_threshold, &count_threshold_ops);
|
||||
if (!decay) {
|
||||
pr_warn("Error creating count_threshold debugfs node!\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
debugfs_remove_recursive(d);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __init cec_init(void)
|
||||
{
|
||||
if (ce_arr.disabled)
|
||||
return;
|
||||
|
||||
ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!ce_arr.array) {
|
||||
pr_err("Error allocating CE array page!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (create_debugfs_nodes())
|
||||
return;
|
||||
|
||||
setup_timer(&cec_timer, cec_timer_fn, (unsigned long)&ce_arr);
|
||||
cec_mod_timer(&cec_timer, CEC_TIMER_DEFAULT_INTERVAL);
|
||||
|
||||
pr_info("Correctable Errors collector initialized.\n");
|
||||
}
|
||||
|
||||
int __init parse_cec_param(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return 0;
|
||||
|
||||
if (*str == '=')
|
||||
str++;
|
||||
|
||||
if (!strncmp(str, "cec_disable", 7))
|
||||
ce_arr.disabled = 1;
|
||||
else
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
static struct dentry *ras_debugfs_dir;
|
||||
struct dentry *ras_debugfs_dir;
|
||||
|
||||
static atomic_t trace_count = ATOMIC_INIT(0);
|
||||
|
||||
|
8
drivers/ras/debugfs.h
Normal file
8
drivers/ras/debugfs.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef __RAS_DEBUGFS_H__
|
||||
#define __RAS_DEBUGFS_H__
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
extern struct dentry *ras_debugfs_dir;
|
||||
|
||||
#endif /* __RAS_DEBUGFS_H__ */
|
@ -27,3 +27,14 @@ subsys_initcall(ras_init);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
|
||||
#endif
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
|
||||
|
||||
|
||||
int __init parse_ras_param(char *str)
|
||||
{
|
||||
#ifdef CONFIG_RAS_CEC
|
||||
parse_cec_param(str);
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("ras", parse_ras_param);
|
||||
|
@ -1,14 +1,25 @@
|
||||
#ifndef __RAS_H__
|
||||
#define __RAS_H__
|
||||
|
||||
#include <asm/errno.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
int ras_userspace_consumers(void);
|
||||
void ras_debugfs_init(void);
|
||||
int ras_add_daemon_trace(void);
|
||||
#else
|
||||
static inline int ras_userspace_consumers(void) { return 0; }
|
||||
static inline void ras_debugfs_init(void) { return; }
|
||||
static inline void ras_debugfs_init(void) { }
|
||||
static inline int ras_add_daemon_trace(void) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RAS_CEC
|
||||
void __init cec_init(void);
|
||||
int __init parse_cec_param(char *str);
|
||||
int cec_add_elem(u64 pfn);
|
||||
#else
|
||||
static inline void __init cec_init(void) { }
|
||||
static inline int cec_add_elem(u64 pfn) { return -ENODEV; }
|
||||
#endif
|
||||
|
||||
#endif /* __RAS_H__ */
|
||||
|
Loading…
Reference in New Issue
Block a user