Skip to content
Snippets Groups Projects
Commit 20965b95 authored by Xu Qiang's avatar Xu Qiang Committed by Yang Yingliang
Browse files

watchdog/corelockup: Optimized core lockup detection judgment rules

ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4F3V1


CVE: NA

--------------------------------

Optimized core lockup detection judgment rules to
make it easier to understand.

Core suspension detection is performed in the hrtimer
interrupt processing function. The detection condition
is that the hrtimer interrupt and NMI interrupt are not
updated for multiple consecutive times.

Signed-off-by: default avatarXu Qiang <xuqiang36@huawei.com>
Reviewed-by: default avatarDing Tianhong <dingtianhong@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parent bcb31c8c
No related branches found
No related tags found
No related merge requests found
......@@ -24,23 +24,8 @@
#define nops(n) asm volatile(__nops(n))
#define sev() asm volatile("sev" : : : "memory")
#ifdef CONFIG_CORELOCKUP_DETECTOR
extern unsigned int close_wfi_wfe;
#define wfe() \
do { \
if (likely(close_wfi_wfe == 0)) \
asm volatile("wfe" : : : "memory"); \
} while (0)
#define wfi() \
do { \
if (likely(close_wfi_wfe == 0)) \
asm volatile("wfi" : : : "memory"); \
} while (0)
#else
#define wfe() asm volatile("wfe" : : : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
#endif
#define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
......
......@@ -77,7 +77,6 @@ EXPORT_SYMBOL(arch_touch_nmi_watchdog);
* nmi_cnt_missed: the nmi consecutive miss counts of detector_cpu
* hrint_saved: saved hrtimer interrupts of detector_cpu
* hrint_missed: the hrtimer consecutive miss counts of detector_cpu
* corelockup_cpumask/close_wfi_wfe:
* the cpu mask is set if certain cpu maybe fall in suspend and close
* wfi/wfe mode if any bit is set
*/
......@@ -85,12 +84,9 @@ static DEFINE_PER_CPU(unsigned int, detector_cpu);
static DEFINE_PER_CPU(unsigned long, nmi_interrupts);
static DEFINE_PER_CPU(unsigned long, nmi_cnt_saved);
static DEFINE_PER_CPU(unsigned long, nmi_cnt_missed);
static DEFINE_PER_CPU(bool, core_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrint_saved);
static DEFINE_PER_CPU(unsigned long, hrint_missed);
struct cpumask corelockup_cpumask __read_mostly;
unsigned int close_wfi_wfe;
static bool pmu_based_nmi;
static unsigned long corelockup_allcpu_dumped;
bool enable_corelockup_detector;
static int __init enable_corelockup_detector_setup(char *str)
......@@ -150,6 +146,11 @@ void watchdog_check_hrtimer(void)
{
unsigned int cpu = __this_cpu_read(detector_cpu);
unsigned long hrint = watchdog_hrtimer_interrupts(cpu);
unsigned long nmi_int = per_cpu(nmi_interrupts, cpu);
/* skip check if only one cpu online */
if (cpu == smp_processor_id())
return;
/*
* The freq of hrtimer is fast than nmi interrupts and
......@@ -159,23 +160,31 @@ void watchdog_check_hrtimer(void)
*/
watchdog_nmi_interrupts();
if (!pmu_based_nmi)
return;
if (__this_cpu_read(hrint_saved) != hrint) {
__this_cpu_write(hrint_saved, hrint);
__this_cpu_write(hrint_missed, 0);
cpumask_clear_cpu(cpu, &corelockup_cpumask);
} else {
__this_cpu_inc(hrint_missed);
if (__this_cpu_read(hrint_missed) > 2)
cpumask_set_cpu(cpu, &corelockup_cpumask);
return;
}
__this_cpu_inc(hrint_missed);
if (__this_cpu_read(nmi_cnt_saved) != nmi_int) {
__this_cpu_write(nmi_cnt_saved, nmi_int);
__this_cpu_write(nmi_cnt_missed, 0);
return;
}
__this_cpu_inc(nmi_cnt_missed);
if (likely(cpumask_empty(&corelockup_cpumask)))
close_wfi_wfe = 0;
else
close_wfi_wfe = 1;
if ((__this_cpu_read(hrint_missed) > 5) && (__this_cpu_read(nmi_cnt_missed) > 5)) {
pr_emerg("Watchdog detected core LOCKUP on cpu %d\n", cpu);
if (!test_and_set_bit(0, &corelockup_allcpu_dumped)) {
trigger_allbutself_cpu_backtrace();
panic("Core LOCKUP");
} else {
while (1)
cpu_relax();
}
}
}
/*
......@@ -206,9 +215,6 @@ void corelockup_detector_offline_cpu(unsigned int cpu)
unsigned int prev = nr_cpu_ids;
unsigned int i;
/* clear bitmap */
cpumask_clear_cpu(cpu, &corelockup_cpumask);
/* found prev cpu */
for_each_cpu_and(i, &watchdog_cpumask, cpu_online_mask) {
if (per_cpu(detector_cpu, i) == cpu) {
......@@ -223,45 +229,6 @@ void corelockup_detector_offline_cpu(unsigned int cpu)
/* prev->next */
corelockup_status_copy(cpu, prev);
}
static bool is_corelockup(unsigned int cpu)
{
unsigned long nmi_int = per_cpu(nmi_interrupts, cpu);
/* skip check if only one cpu online */
if (cpu == smp_processor_id())
return false;
if (__this_cpu_read(nmi_cnt_saved) != nmi_int) {
__this_cpu_write(nmi_cnt_saved, nmi_int);
__this_cpu_write(nmi_cnt_missed, 0);
per_cpu(core_watchdog_warn, cpu) = false;
return false;
}
__this_cpu_inc(nmi_cnt_missed);
if (__this_cpu_read(nmi_cnt_missed) > 2)
return true;
return false;
}
NOKPROBE_SYMBOL(is_corelockup);
static void watchdog_corelockup_check(struct pt_regs *regs)
{
unsigned int cpu = __this_cpu_read(detector_cpu);
if (is_corelockup(cpu)) {
if (per_cpu(core_watchdog_warn, cpu) == true)
return;
pr_emerg("Watchdog detected core LOCKUP on cpu %d\n", cpu);
if (hardlockup_panic)
nmi_panic(regs, "Core LOCKUP");
per_cpu(core_watchdog_warn, cpu) = true;
}
}
#endif
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
......@@ -335,9 +302,6 @@ void watchdog_hardlockup_check(struct pt_regs *regs)
if (enable_corelockup_detector) {
/* Kick nmi interrupts */
watchdog_nmi_interrupts();
/* corelockup check */
watchdog_corelockup_check(regs);
}
#endif
......@@ -546,9 +510,6 @@ int __init hardlockup_detector_perf_init(void)
perf_event_release_kernel(this_cpu_read(watchdog_ev));
this_cpu_write(watchdog_ev, NULL);
}
#ifdef CONFIG_CORELOCKUP_DETECTOR
pmu_based_nmi = true;
#endif
return ret;
}
#endif /* CONFIG_HARDLOCKUP_DETECTOR_PERF */
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment