diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/asm-offsets.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 49 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 31 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpu.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 72 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/tsx.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 97 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 108 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/signal_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/tls.c | 1 |
14 files changed, 328 insertions, 80 deletions
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 82c783da16a8..ef9e951415c5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define COMPILE_OFFSETS #include <linux/crypto.h> +#include <crypto/aria.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/hardirq.h> @@ -111,5 +112,12 @@ static void __used common(void) #ifdef CONFIG_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif +#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) + /* Offset for fields in aria_ctx */ + BLANK(); + OFFSET(ARIA_CTX_enc_key, aria_ctx, enc_key); + OFFSET(ARIA_CTX_dec_key, aria_ctx, dec_key); + OFFSET(ARIA_CTX_rounds, aria_ctx, rounds); +#endif } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f769d6d08b43..380753b14cab 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -956,7 +956,7 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_cacheinfo(c); - if (cpu_has(c, X86_FEATURE_XMM2)) { + if (!cpu_has(c, X86_FEATURE_LFENCE_RDTSC) && cpu_has(c, X86_FEATURE_XMM2)) { /* * Use LFENCE for execution serialization. On families which * don't have that MSR, LFENCE is already serializing. @@ -1158,24 +1158,43 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) return false; } -void set_dr_addr_mask(unsigned long mask, int dr) +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask); + +static unsigned int amd_msr_dr_addr_masks[] = { + MSR_F16H_DR0_ADDR_MASK, + MSR_F16H_DR1_ADDR_MASK, + MSR_F16H_DR1_ADDR_MASK + 1, + MSR_F16H_DR1_ADDR_MASK + 2 +}; + +void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { - if (!boot_cpu_has(X86_FEATURE_BPEXT)) + int cpu = smp_processor_id(); + + if (!cpu_feature_enabled(X86_FEATURE_BPEXT)) return; - switch (dr) { - case 0: - wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); - break; - case 1: - case 2: - case 3: - wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); - break; - default: - break; - } + if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks))) + return; + + if (per_cpu(amd_dr_addr_mask, cpu)[dr] == mask) + return; + + wrmsr(amd_msr_dr_addr_masks[dr], mask, 0); + per_cpu(amd_dr_addr_mask, cpu)[dr] = mask; +} + +unsigned long amd_get_dr_addr_mask(unsigned int dr) +{ + if (!cpu_feature_enabled(X86_FEATURE_BPEXT)) + return 0; + + if (WARN_ON_ONCE(dr >= ARRAY_SIZE(amd_msr_dr_addr_masks))) + return 0; + + return per_cpu(amd_dr_addr_mask[dr], smp_processor_id()); } +EXPORT_SYMBOL_GPL(amd_get_dr_addr_mask); u32 amd_get_highest_perf(void) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 85168740f76a..cf81848b72f4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -33,6 +33,7 @@ #include <asm/e820/api.h> #include <asm/hypervisor.h> #include <asm/tlbflush.h> +#include <asm/cpu.h> #include "cpu.h" @@ -144,9 +145,17 @@ void __init check_bugs(void) * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD * init code as it is not enumerated and depends on the family. */ - if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) { rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + /* + * Previously running kernel (kexec), may have some controls + * turned ON. Clear them and let the mitigations setup below + * rediscover them based on configuration. + */ + x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; + } + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -1229,9 +1238,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", - [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", - [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines", [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; @@ -1300,7 +1309,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -1486,8 +1495,12 @@ static void __init spectre_v2_select_mitigation(void) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); if (spectre_v2_in_ibrs_mode(mode)) { - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - update_spec_ctrl(x86_spec_ctrl_base); + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) { + msr_set_bit(MSR_EFER, _EFER_AUTOIBRS); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + update_spec_ctrl(x86_spec_ctrl_base); + } } switch (mode) { @@ -1571,8 +1584,8 @@ static void __init spectre_v2_select_mitigation(void) /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS * and Enhanced IBRS protect firmware too, so enable IBRS around - * firmware calls only when IBRS / Enhanced IBRS aren't otherwise - * enabled. + * firmware calls only when IBRS / Enhanced / Automatic IBRS aren't + * otherwise enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 65ceabb2e114..a394bbba7a4b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1093,6 +1093,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x8000001f) c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f); + if (c->extended_cpuid_level >= 0x80000021) + c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021); + init_scattered_cpuid_features(c); init_speculation_control(c); @@ -1226,8 +1229,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* Zhaoxin Family 7 */ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), @@ -1340,8 +1343,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (ia32_cap & ARCH_CAP_IBRS_ALL) + /* + * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature + * flag and protect from vendor-specific bugs via the whitelist. + */ + if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { @@ -1403,11 +1414,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_RETBLEED); } - if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && - !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) - setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); - if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); @@ -1687,9 +1693,7 @@ void check_null_seg_clears_base(struct cpuinfo_x86 *c) if (!IS_ENABLED(CONFIG_X86_64)) return; - /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */ - if (c->extended_cpuid_level >= 0x80000021 && - cpuid_eax(0x80000021) & BIT(6)) + if (cpu_has(c, X86_FEATURE_NULL_SEL_CLR_BASE)) return; /* @@ -1964,6 +1968,7 @@ void __init identify_boot_cpu(void) setup_cr_pinning(); tsx_init(); + lkgs_init(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 7c9b5893c30a..57a5349e6954 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -83,6 +83,4 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); -extern u64 x86_read_arch_cap_msr(void); - #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 46668e255421..f924a76c6923 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -37,9 +37,76 @@ /* Is Linux running as the root partition? */ bool hv_root_partition; +/* Is Linux running on nested Microsoft Hypervisor */ +bool hv_nested; struct ms_hyperv_info ms_hyperv; #if IS_ENABLED(CONFIG_HYPERV) +static inline unsigned int hv_get_nested_reg(unsigned int reg) +{ + if (hv_is_sint_reg(reg)) + return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0; + + switch (reg) { + case HV_REGISTER_SIMP: + return HV_REGISTER_NESTED_SIMP; + case HV_REGISTER_SIEFP: + return HV_REGISTER_NESTED_SIEFP; + case HV_REGISTER_SVERSION: + return HV_REGISTER_NESTED_SVERSION; + case HV_REGISTER_SCONTROL: + return HV_REGISTER_NESTED_SCONTROL; + case HV_REGISTER_EOM: + return HV_REGISTER_NESTED_EOM; + default: + return reg; + } +} + +u64 hv_get_non_nested_register(unsigned int reg) +{ + u64 value; + + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) + hv_ghcb_msr_read(reg, &value); + else + rdmsrl(reg, value); + return value; +} +EXPORT_SYMBOL_GPL(hv_get_non_nested_register); + +void hv_set_non_nested_register(unsigned int reg, u64 value) +{ + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { + hv_ghcb_msr_write(reg, value); + + /* Write proxy bit via wrmsl instruction */ + if (hv_is_sint_reg(reg)) + wrmsrl(reg, value | 1 << 20); + } else { + wrmsrl(reg, value); + } +} +EXPORT_SYMBOL_GPL(hv_set_non_nested_register); + +u64 hv_get_register(unsigned int reg) +{ + if (hv_nested) + reg = hv_get_nested_reg(reg); + + return hv_get_non_nested_register(reg); +} +EXPORT_SYMBOL_GPL(hv_get_register); + +void hv_set_register(unsigned int reg, u64 value) +{ + if (hv_nested) + reg = hv_get_nested_reg(reg); + + hv_set_non_nested_register(reg, value); +} +EXPORT_SYMBOL_GPL(hv_set_register); + static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); @@ -301,6 +368,11 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running as root partition\n"); } + if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) { + hv_nested = true; + pr_info("Hyper-V: running on a nested hypervisor\n"); + } + /* * Extract host information. */ diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 8009c8346d8f..b31ee4f1657a 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -11,6 +11,7 @@ #include <linux/cpufeature.h> #include <asm/cmdline.h> +#include <asm/cpu.h> #include "cpu.h" diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index bbb0f737aab1..b01644c949b2 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -127,7 +127,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(*dr7, 7); if (info->mask) - set_dr_addr_mask(info->mask, i); + amd_set_dr_addr_mask(info->mask, i); return 0; } @@ -166,7 +166,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) set_debugreg(dr7, 7); if (info->mask) - set_dr_addr_mask(0, i); + amd_set_dr_addr_mask(0, i); /* * Ensure the write to cpu_dr7 is after we've set the DR7 register. diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e57e07b0edb6..57b0037d0a99 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -46,8 +46,8 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) /* This function only handles jump-optimized kprobe */ if (kp && kprobe_optimized(kp)) { op = container_of(kp, struct optimized_kprobe, kp); - /* If op->list is not empty, op is under optimizing */ - if (list_empty(&op->list)) + /* If op is optimized or under unoptimizing */ + if (list_empty(&op->list) || optprobe_queued_unopt(op)) goto found; } } @@ -353,7 +353,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) for (i = 1; i < op->optinsn.size; i++) { p = get_kprobe(op->kp.addr + i); - if (p && !kprobe_disabled(p)) + if (p && !kprobe_disarmed(p)) return -EEXIST; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 705fb2a41d7d..84ad0e61ba6e 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -129,22 +129,27 @@ int apply_relocate(Elf32_Shdr *sechdrs, return 0; } #else /*X86_64*/ -static int __apply_relocate_add(Elf64_Shdr *sechdrs, +static int __write_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *me, - void *(*write)(void *dest, const void *src, size_t len)) + void *(*write)(void *dest, const void *src, size_t len), + bool apply) { unsigned int i; Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; u64 val; + u64 zero = 0ULL; - DEBUGP("Applying relocate section %u to %u\n", + DEBUGP("%s relocate section %u to %u\n", + apply ? "Applying" : "Clearing", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + size_t size; + /* This is where to make the change */ loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; @@ -162,56 +167,53 @@ static int __apply_relocate_add(Elf64_Shdr *sechdrs, switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: - break; + continue; /* nothing to write */ case R_X86_64_64: - if (*(u64 *)loc != 0) - goto invalid_relocation; - write(loc, &val, 8); + size = 8; break; case R_X86_64_32: - if (*(u32 *)loc != 0) - goto invalid_relocation; - write(loc, &val, 4); - if (val != *(u32 *)loc) + if (val != *(u32 *)&val) goto overflow; + size = 4; break; case R_X86_64_32S: - if (*(s32 *)loc != 0) - goto invalid_relocation; - write(loc, &val, 4); - if ((s64)val != *(s32 *)loc) + if ((s64)val != *(s32 *)&val) goto overflow; + size = 4; break; case R_X86_64_PC32: case R_X86_64_PLT32: - if (*(u32 *)loc != 0) - goto invalid_relocation; val -= (u64)loc; - write(loc, &val, 4); -#if 0 - if ((s64)val != *(s32 *)loc) - goto overflow; -#endif + size = 4; break; case R_X86_64_PC64: - if (*(u64 *)loc != 0) - goto invalid_relocation; val -= (u64)loc; - write(loc, &val, 8); + size = 8; break; default: pr_err("%s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } + + if (apply) { + if (memcmp(loc, &zero, size)) { + pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + return -ENOEXEC; + } + write(loc, &val, size); + } else { + if (memcmp(loc, &val, size)) { + pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + return -ENOEXEC; + } + write(loc, &zero, size); + } } return 0; -invalid_relocation: - pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), loc, val); - return -ENOEXEC; - overflow: pr_err("overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); @@ -220,11 +222,12 @@ overflow: return -ENOEXEC; } -int apply_relocate_add(Elf64_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) +static int write_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me, + bool apply) { int ret; bool early = me->state == MODULE_STATE_UNFORMED; @@ -235,8 +238,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, mutex_lock(&text_mutex); } - ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me, - write); + ret = __write_relocate_add(sechdrs, strtab, symindex, relsec, me, + write, apply); if (!early) { text_poke_sync(); @@ -246,6 +249,26 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return ret; } +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + return write_relocate_add(sechdrs, strtab, symindex, relsec, me, true); +} + +#ifdef CONFIG_LIVEPATCH +void clear_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + write_relocate_add(sechdrs, strtab, symindex, relsec, me, false); +} +#endif + #endif int module_finalize(const Elf_Ehdr *hdr, diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index cec0bfa3bc04..c315b18ec7c8 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -69,6 +69,15 @@ struct nmi_stats { unsigned int unknown; unsigned int external; unsigned int swallow; + unsigned long recv_jiffies; + unsigned long idt_seq; + unsigned long idt_nmi_seq; + unsigned long idt_ignored; + atomic_long_t idt_calls; + unsigned long idt_seq_snap; + unsigned long idt_nmi_seq_snap; + unsigned long idt_ignored_snap; + long idt_calls_snap; }; static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); @@ -479,12 +488,15 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7); DEFINE_IDTENTRY_RAW(exc_nmi) { irqentry_state_t irq_state; + struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats); /* * Re-enable NMIs right here when running as an SEV-ES guest. This might * cause nested NMIs, but those can be handled safely. */ sev_es_nmi_complete(); + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) + arch_atomic_long_inc(&nsp->idt_calls); if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; @@ -495,6 +507,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi) } this_cpu_write(nmi_state, NMI_EXECUTING); this_cpu_write(nmi_cr2, read_cr2()); + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { + WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1); + WARN_ON_ONCE(!(nsp->idt_seq & 0x1)); + WRITE_ONCE(nsp->recv_jiffies, jiffies); + } nmi_restart: /* @@ -509,8 +526,19 @@ nmi_restart: inc_irq_stat(__nmi_count); - if (!ignore_nmis) + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) { + WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1); + } else if (!ignore_nmis) { + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1); + WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1)); + } default_do_nmi(regs); + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1); + WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1); + } + } irqentry_nmi_exit(regs, irq_state); @@ -525,6 +553,11 @@ nmi_restart: if (user_mode(regs)) mds_user_clear_cpu_buffers(); + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { + WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1); + WARN_ON_ONCE(nsp->idt_seq & 0x1); + WRITE_ONCE(nsp->recv_jiffies, jiffies); + } } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) @@ -537,6 +570,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist) EXPORT_SYMBOL_GPL(asm_exc_nmi_noist); #endif +#ifdef CONFIG_NMI_CHECK_CPU + +static char *nmi_check_stall_msg[] = { +/* */ +/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* | +------ cpu_is_offline(cpu) */ +/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ +/* | | | NMI handler has been invoked. */ +/* | | | */ +/* V V V */ +/* 0 0 0 */ "NMIs are not reaching exc_nmi() handler", +/* 0 0 1 */ "exc_nmi() handler is ignoring NMIs", +/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler", +/* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs", +/* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler", +/* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs", +/* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler", +/* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs", +}; + +void nmi_backtrace_stall_snap(const struct cpumask *btp) +{ + int cpu; + struct nmi_stats *nsp; + + for_each_cpu(cpu, btp) { + nsp = per_cpu_ptr(&nmi_stats, cpu); + nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq); + nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq); + nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored); + nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls); + } +} + +void nmi_backtrace_stall_check(const struct cpumask *btp) +{ + int cpu; + int idx; + unsigned long nmi_seq; + unsigned long j = jiffies; + char *modp; + char *msgp; + char *msghp; + struct nmi_stats *nsp; + + for_each_cpu(cpu, btp) { + nsp = per_cpu_ptr(&nmi_stats, cpu); + modp = ""; + msghp = ""; + nmi_seq = READ_ONCE(nsp->idt_nmi_seq); + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { + msgp = "CPU entered NMI handler function, but has not exited"; + } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { + msgp = "CPU is handling NMIs"; + } else { + idx = ((nsp->idt_seq_snap & 0x1) << 2) | + (cpu_is_offline(cpu) << 1) | + (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); + msgp = nmi_check_stall_msg[idx]; + if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) + modp = ", but OK because ignore_nmis was set"; + if (nmi_seq & ~0x1) + msghp = " (CPU currently in NMI handler function)"; + else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + msghp = " (CPU exited one NMI handler function)"; + } + pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", + __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + } +} + +#endif + void stop_nmi(void) { ignore_nmis++; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5bf4f0b2f35d..42e182868873 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -32,6 +32,7 @@ #include <asm/special_insns.h> #include <asm/tlb.h> #include <asm/io_bitmap.h> +#include <asm/gsseg.h> /* * nop stub, which must not clobber anything *including the stack* to diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index f042dcdf1f16..9027fc088f97 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -31,6 +31,7 @@ #include <asm/sigframe.h> #include <asm/sighandling.h> #include <asm/smap.h> +#include <asm/gsseg.h> #ifdef CONFIG_IA32_EMULATION #include <asm/ia32_unistd.h> diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 3c883e064242..3ffbab0081f4 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -12,6 +12,7 @@ #include <asm/ldt.h> #include <asm/processor.h> #include <asm/proto.h> +#include <asm/gsseg.h> #include "tls.h" |