diff options
Diffstat (limited to 'arch')
412 files changed, 9143 insertions, 4648 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 93404c802d29..b34946d90e4b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -60,9 +60,9 @@ config GENERIC_ENTRY config KPROBES bool "Kprobes" - depends on MODULES depends on HAVE_KPROBES select KALLSYMS + select EXECMEM select NEED_TASKS_RCU help Kprobes allows you to trap at almost any kernel address and @@ -977,6 +977,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC For architectures like powerpc/32 which have constraints on module allocation and need to allocate module data outside of module area. +config ARCH_WANTS_EXECMEM_LATE + bool + help + For architectures that do not allocate executable memory early on + boot, but rather require its initialization late when there is + enough entropy for module space randomization, for instance + arm64. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b14aed3a17ab..749d6e3788b4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -1233,9 +1234,9 @@ config HIGHPTE consumed by page tables. Setting this option will allow user-space 2nd level page tables to reside in high memory. -config CPU_SW_DOMAIN_PAN - bool "Enable use of CPU domains to implement privileged no-access" - depends on MMU && !ARM_LPAE +config ARM_PAN + bool "Enable privileged no-access" + depends on MMU default y help Increase kernel security by ensuring that normal kernel accesses @@ -1244,10 +1245,26 @@ config CPU_SW_DOMAIN_PAN by ensuring that magic values (such as LIST_POISON) will always fault when dereferenced. + The implementation uses CPU domains when !CONFIG_ARM_LPAE and + disabling of TTBR0 page table walks with CONFIG_ARM_LPAE. + +config CPU_SW_DOMAIN_PAN + def_bool y + depends on ARM_PAN && !ARM_LPAE + help + Enable use of CPU domains to implement privileged no-access. + CPUs with low-vector mappings use a best-efforts implementation. Their lower 1MB needs to remain accessible for the vectors, but the remainder of userspace will become appropriately inaccessible. +config CPU_TTBR0_PAN + def_bool y + depends on ARM_PAN && ARM_LPAE + help + Enable privileged no-access by disabling TTBR0 page table walks when + running in kernel mode. + config HW_PERF_EVENTS def_bool y depends on ARM_PMU diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 01b5a5a73f03..42cb1c854118 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set CONFIG_ARCH_MULTI_V4=y # CONFIG_ARCH_MULTI_V7 is not set diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 59c4835ffc97..c1291ca290b2 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -12,7 +12,7 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_BLK_DEV_INITRD=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EXPERT=y CONFIG_PROFILING=y diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index d169da9b2824..f55c231e0870 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZ4 is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 1d41e73f4903..34d079e03b3c 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SIGNALFD is not set # CONFIG_TIMERFD is not set # CONFIG_EVENTFD is not set diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig index 3ed73f184d83..e995e50537ef 100644 --- a/arch/arm/configs/mps2_defconfig +++ b/arch/arm/configs/mps2_defconfig @@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=16 CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 729ea8157e2a..025b595dd837 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -9,7 +9,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set # CONFIG_KALLSYMS is not set CONFIG_PROFILING=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index b9fe3fbed5ae..3baec075d1ef 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -6,7 +6,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index aebe2c8f6a68..d33c1e24e00b 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -21,6 +21,7 @@ #include <asm/opcodes-virt.h> #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/pgtable.h> #include <asm/thread_info.h> #include <asm/uaccess-asm.h> diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index 724f8dac1e5b..4186fbf7341f 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -118,6 +118,10 @@ # define MULTI_CACHE 1 #endif +#ifdef CONFIG_CPU_CACHE_NOP +# define MULTI_CACHE 1 +#endif + #if defined(CONFIG_CPU_V7M) # define MULTI_CACHE 1 #endif @@ -126,29 +130,15 @@ #error Unknown cache maintenance model #endif -#ifndef __ASSEMBLER__ -static inline void nop_flush_icache_all(void) { } -static inline void nop_flush_kern_cache_all(void) { } -static inline void nop_flush_kern_cache_louis(void) { } -static inline void nop_flush_user_cache_all(void) { } -static inline void nop_flush_user_cache_range(unsigned long a, - unsigned long b, unsigned int c) { } - -static inline void nop_coherent_kern_range(unsigned long a, unsigned long b) { } -static inline int nop_coherent_user_range(unsigned long a, - unsigned long b) { return 0; } -static inline void nop_flush_kern_dcache_area(void *a, size_t s) { } - -static inline void nop_dma_flush_range(const void *a, const void *b) { } - -static inline void nop_dma_map_area(const void *s, size_t l, int f) { } -static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { } -#endif - #ifndef MULTI_CACHE #define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all) #define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all) +/* This function only has a dedicated assembly callback on the v7 cache */ +#ifdef CONFIG_CPU_CACHE_V7 #define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_louis) +#else +#define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_all) +#endif #define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all) #define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range) #define __cpuc_coherent_kern_range __glue(_CACHE,_coherent_kern_range) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 62358d3ca0a8..e7f9961c53b2 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -84,6 +84,7 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_DSCR_MOE(x) ((x >> 2) & 0xf) #define ARM_ENTRY_BREAKPOINT 0x1 #define ARM_ENTRY_ASYNC_WATCHPOINT 0x2 +#define ARM_ENTRY_CFI_BREAKPOINT 0x3 #define ARM_ENTRY_SYNC_WATCHPOINT 0xa /* DSCR monitor/halting bits. */ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 2f35b4eddaa8..323ad811732e 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -74,6 +74,7 @@ #define PHYS_MASK_SHIFT (40) #define PHYS_MASK ((1ULL << PHYS_MASK_SHIFT) - 1) +#ifndef CONFIG_CPU_TTBR0_PAN /* * TTBR0/TTBR1 split (PAGE_OFFSET): * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) @@ -93,5 +94,30 @@ #endif #define TTBR1_SIZE (((PAGE_OFFSET >> 30) - 1) << 16) +#else +/* + * With CONFIG_CPU_TTBR0_PAN enabled, TTBR1 is only used during uaccess + * disabled regions when TTBR0 is disabled. + */ +#define TTBR1_OFFSET 0 /* pointing to swapper_pg_dir */ +#define TTBR1_SIZE 0 /* TTBR1 size controlled via TTBCR.T0SZ */ +#endif + +/* + * TTBCR register bits. + */ +#define TTBCR_EAE (1 << 31) +#define TTBCR_IMP (1 << 30) +#define TTBCR_SH1_MASK (3 << 28) +#define TTBCR_ORGN1_MASK (3 << 26) +#define TTBCR_IRGN1_MASK (3 << 24) +#define TTBCR_EPD1 (1 << 23) +#define TTBCR_A1 (1 << 22) +#define TTBCR_T1SZ_MASK (7 << 16) +#define TTBCR_SH0_MASK (3 << 12) +#define TTBCR_ORGN0_MASK (3 << 10) +#define TTBCR_IRGN0_MASK (3 << 8) +#define TTBCR_EPD0 (1 << 7) +#define TTBCR_T0SZ_MASK (7 << 0) #endif diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 280396483f5d..b4986a23d852 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -178,6 +178,18 @@ extern void cpu_resume(void); }) #endif +static inline unsigned int cpu_get_ttbcr(void) +{ + unsigned int ttbcr; + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + return ttbcr; +} + +static inline void cpu_set_ttbcr(unsigned int ttbcr) +{ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr) : "memory"); +} + #else /*!CONFIG_MMU */ #define cpu_switch_mm(pgd,mm) { } diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 14a38cc67e0b..6eb311fb2da0 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -20,6 +20,7 @@ struct pt_regs { struct svc_pt_regs { struct pt_regs regs; u32 dacr; + u32 ttbcr; }; #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs) diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h index 65da32e1f1c1..4bccd895d954 100644 --- a/arch/arm/include/asm/uaccess-asm.h +++ b/arch/arm/include/asm/uaccess-asm.h @@ -39,8 +39,9 @@ #endif .endm +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) + .macro uaccess_disable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN /* * Whenever we re-enter userspace, the domains should always be * set appropriately. @@ -50,11 +51,9 @@ .if \isb instr_sync .endif -#endif .endm .macro uaccess_enable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN /* * Whenever we re-enter userspace, the domains should always be * set appropriately. @@ -64,15 +63,61 @@ .if \isb instr_sync .endif -#endif .endm +#elif defined(CONFIG_CPU_TTBR0_PAN) + + .macro uaccess_disable, tmp, isb=1 + /* + * Disable TTBR0 page table walks (EDP0 = 1), use the reserved ASID + * from TTBR1 (A1 = 1) and enable TTBR1 page table walks for kernel + * addresses by reducing TTBR0 range to 32MB (T0SZ = 7). + */ + mrc p15, 0, \tmp, c2, c0, 2 @ read TTBCR + orr \tmp, \tmp, #TTBCR_EPD0 | TTBCR_T0SZ_MASK + orr \tmp, \tmp, #TTBCR_A1 + mcr p15, 0, \tmp, c2, c0, 2 @ write TTBCR + .if \isb + instr_sync + .endif + .endm + + .macro uaccess_enable, tmp, isb=1 + /* + * Enable TTBR0 page table walks (T0SZ = 0, EDP0 = 0) and ASID from + * TTBR0 (A1 = 0). + */ + mrc p15, 0, \tmp, c2, c0, 2 @ read TTBCR + bic \tmp, \tmp, #TTBCR_EPD0 | TTBCR_T0SZ_MASK + bic \tmp, \tmp, #TTBCR_A1 + mcr p15, 0, \tmp, c2, c0, 2 @ write TTBCR + .if \isb + instr_sync + .endif + .endm + +#else + + .macro uaccess_disable, tmp, isb=1 + .endm + + .macro uaccess_enable, tmp, isb=1 + .endm + +#endif + #if defined(CONFIG_CPU_SW_DOMAIN_PAN) || defined(CONFIG_CPU_USE_DOMAINS) #define DACR(x...) x #else #define DACR(x...) #endif +#ifdef CONFIG_CPU_TTBR0_PAN +#define PAN(x...) x +#else +#define PAN(x...) +#endif + /* * Save the address limit on entry to a privileged exception. * @@ -86,6 +131,8 @@ .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable DACR( mrc p15, 0, \tmp0, c3, c0, 0) DACR( str \tmp0, [sp, #SVC_DACR]) + PAN( mrc p15, 0, \tmp0, c2, c0, 2) + PAN( str \tmp0, [sp, #SVC_TTBCR]) .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN) /* kernel=client, user=no access */ mov \tmp2, #DACR_UACCESS_DISABLE @@ -104,8 +151,11 @@ .macro uaccess_exit, tsk, tmp0, tmp1 DACR( ldr \tmp0, [sp, #SVC_DACR]) DACR( mcr p15, 0, \tmp0, c3, c0, 0) + PAN( ldr \tmp0, [sp, #SVC_TTBCR]) + PAN( mcr p15, 0, \tmp0, c2, c0, 2) .endm #undef DACR +#undef PAN #endif /* __ASM_UACCESS_ASM_H__ */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 9556d04387f7..6c9c16d767cf 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -14,6 +14,8 @@ #include <asm/domain.h> #include <asm/unaligned.h> #include <asm/unified.h> +#include <asm/pgtable.h> +#include <asm/proc-fns.h> #include <asm/compiler.h> #include <asm/extable.h> @@ -24,9 +26,10 @@ * perform such accesses (eg, via list poison values) which could then * be exploited for priviledge escalation. */ +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) + static __always_inline unsigned int uaccess_save_and_enable(void) { -#ifdef CONFIG_CPU_SW_DOMAIN_PAN unsigned int old_domain = get_domain(); /* Set the current domain access to permit user accesses */ @@ -34,19 +37,49 @@ static __always_inline unsigned int uaccess_save_and_enable(void) domain_val(DOMAIN_USER, DOMAIN_CLIENT)); return old_domain; -#else - return 0; -#endif } static __always_inline void uaccess_restore(unsigned int flags) { -#ifdef CONFIG_CPU_SW_DOMAIN_PAN /* Restore the user access mask */ set_domain(flags); -#endif } +#elif defined(CONFIG_CPU_TTBR0_PAN) + +static __always_inline unsigned int uaccess_save_and_enable(void) +{ + unsigned int old_ttbcr = cpu_get_ttbcr(); + + /* + * Enable TTBR0 page table walks (T0SZ = 0, EDP0 = 0) and ASID from + * TTBR0 (A1 = 0). + */ + cpu_set_ttbcr(old_ttbcr & ~(TTBCR_A1 | TTBCR_EPD0 | TTBCR_T0SZ_MASK)); + isb(); + + return old_ttbcr; +} + +static inline void uaccess_restore(unsigned int flags) +{ + cpu_set_ttbcr(flags); + isb(); +} + +#else + +static inline unsigned int uaccess_save_and_enable(void) +{ + return 0; +} + +static inline void uaccess_restore(unsigned int flags) +{ +} + +#endif + /* * These two are intentionally not defined anywhere - if the kernel * code generates any references to them, that's a bug. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 4915662842ff..4853875740d0 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -85,6 +85,7 @@ int main(void) DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); DEFINE(SVC_DACR, offsetof(struct svc_pt_regs, dacr)); + DEFINE(SVC_TTBCR, offsetof(struct svc_pt_regs, ttbcr)); DEFINE(SVC_REGS_SIZE, sizeof(struct svc_pt_regs)); BLANK(); DEFINE(SIGFRAME_RC3_OFFSET, offsetof(struct sigframe, retcode[3])); diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index 3e7bcaca5e07..bc598e3d8dd2 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -271,6 +271,10 @@ ENTRY(ftrace_stub) ret lr ENDPROC(ftrace_stub) +ENTRY(ftrace_stub_graph) + ret lr +ENDPROC(ftrace_stub_graph) + #ifdef CONFIG_DYNAMIC_FTRACE __INIT diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 054e9199f30d..a12efd0f43e8 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -17,6 +17,7 @@ #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> #include <linux/smp.h> +#include <linux/cfi.h> #include <linux/cpu_pm.h> #include <linux/coresight.h> @@ -903,6 +904,37 @@ unlock: watchpoint_single_step_handler(addr); } +#ifdef CONFIG_CFI_CLANG +static void hw_breakpoint_cfi_handler(struct pt_regs *regs) +{ + /* + * TODO: implementing target and type to pass to CFI using the more + * elaborate report_cfi_failure() requires compiler work. To be able + * to properly extract target information the compiler needs to + * emit a stable instructions sequence for the CFI checks so we can + * decode the instructions preceding the trap and figure out which + * registers were used. + */ + + switch (report_cfi_failure_noaddr(regs, instruction_pointer(regs))) { + case BUG_TRAP_TYPE_BUG: + die("Oops - CFI", regs, 0); + break; + case BUG_TRAP_TYPE_WARN: + /* Skip the breaking instruction */ + instruction_pointer(regs) += 4; + break; + default: + die("Unknown CFI error", regs, 0); + break; + } +} +#else +static void hw_breakpoint_cfi_handler(struct pt_regs *regs) +{ +} +#endif + /* * Called from either the Data Abort Handler [watchpoint] or the * Prefetch Abort Handler [breakpoint] with interrupts disabled. @@ -932,6 +964,9 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, case ARM_ENTRY_SYNC_WATCHPOINT: watchpoint_handler(addr, fsr, regs); break; + case ARM_ENTRY_CFI_BREAKPOINT: + hw_breakpoint_cfi_handler(regs); + break; default: ret = 1; /* Unhandled fault. */ } diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e74d84f58b77..677f218f7e84 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -12,48 +12,14 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/elf.h> -#include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/string.h> -#include <linux/gfp.h> #include <asm/sections.h> #include <asm/smp_plat.h> #include <asm/unwind.h> #include <asm/opcodes.h> -#ifdef CONFIG_XIP_KERNEL -/* - * The XIP kernel text is mapped in the module area for modules and - * some other stuff to work without any indirect relocations. - * MODULES_VADDR is redefined here and not in asm/memory.h to avoid - * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. - */ -#undef MODULES_VADDR -#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK) -#endif - -#ifdef CONFIG_MMU -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) - return p; - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - bool module_init_section(const char *name) { return strstarts(name, ".init") || diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index c3ec3861dd07..58a6441b58c4 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -12,6 +12,7 @@ #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/tlbflush.h> +#include <asm/uaccess.h> extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); extern void cpu_resume_mmu(void); @@ -27,6 +28,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return -EINVAL; /* + * Needed for the MMU disabling/enabing code to be able to run from + * TTBR0 addresses. + */ + if (IS_ENABLED(CONFIG_CPU_TTBR0_PAN)) + uaccess_save_and_enable(); + + /* * Function graph tracer state gets incosistent when the kernel * calls functions that never return (aka suspend finishers) hence * disable graph tracing during their execution. diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 6928781e6bee..c289bde04743 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -13,7 +13,8 @@ .text -#ifdef CONFIG_CPU_SW_DOMAIN_PAN +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) + .macro save_regs mrc p15, 0, ip, c3, c0, 0 stmfd sp!, {r1, r2, r4 - r8, ip, lr} @@ -25,7 +26,23 @@ mcr p15, 0, ip, c3, c0, 0 ret lr .endm + +#elif defined(CONFIG_CPU_TTBR0_PAN) + + .macro save_regs + mrc p15, 0, ip, c2, c0, 2 @ read TTBCR + stmfd sp!, {r1, r2, r4 - r8, ip, lr} + uaccess_enable ip + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, ip, lr} + mcr p15, 0, ip, c2, c0, 2 @ restore TTBCR + ret lr + .endm + #else + .macro save_regs stmfd sp!, {r1, r2, r4 - r8, lr} .endm @@ -33,6 +50,7 @@ .macro load_regs ldmfd sp!, {r1, r2, r4 - r8, pc} .endm + #endif .macro load1b, reg1 diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index 3ac05177d097..33b08ca1c242 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -5,6 +5,7 @@ * Copyright (C) 1995, 1996 Russell King */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/delay.h> @@ -24,21 +25,26 @@ * HZ <= 1000 */ -ENTRY(__loop_udelay) +SYM_TYPED_FUNC_START(__loop_udelay) ldr r2, .LC1 mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT -ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0 + b __loop_const_udelay +SYM_FUNC_END(__loop_udelay) + +SYM_TYPED_FUNC_START(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0 ldr r2, .LC0 ldr r2, [r2] umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy adds r1, r1, #0xffffffff @ rounding up ... adcs r0, r0, r0 @ and right shift by 31 reteq lr + b __loop_delay +SYM_FUNC_END(__loop_const_udelay) .align 3 @ Delay routine -ENTRY(__loop_delay) +SYM_TYPED_FUNC_START(__loop_delay) subs r0, r0, #1 #if 0 retls lr @@ -58,6 +64,4 @@ ENTRY(__loop_delay) #endif bhi __loop_delay ret lr -ENDPROC(__loop_udelay) -ENDPROC(__loop_const_udelay) -ENDPROC(__loop_delay) +SYM_FUNC_END(__loop_delay) diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 71b858c9b10c..f1f231f20ff9 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o +obj-y += cache.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o @@ -62,6 +63,7 @@ obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o +obj-y += tlb.o obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o @@ -88,6 +90,7 @@ obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V6K) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o obj-$(CONFIG_CPU_V7M) += proc-v7m.o +obj-$(CONFIG_CFI_CLANG) += proc.o obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c index 9c1172f26885..6f63b90f9e1a 100644 --- a/arch/arm/mm/cache-b15-rac.c +++ b/arch/arm/mm/cache-b15-rac.c @@ -5,6 +5,7 @@ * Copyright (C) 2015-2016 Broadcom */ +#include <linux/cfi_types.h> #include <linux/err.h> #include <linux/spinlock.h> #include <linux/io.h> diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 71c64e92dead..4a3668b52a2d 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -12,6 +12,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/page.h> @@ -39,11 +40,11 @@ * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(fa_flush_icache_all) +SYM_TYPED_FUNC_START(fa_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(fa_flush_icache_all) +SYM_FUNC_END(fa_flush_icache_all) /* * flush_user_cache_all() @@ -51,14 +52,14 @@ ENDPROC(fa_flush_icache_all) * Clean and invalidate all cache entries in a particular address * space. */ -ENTRY(fa_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(fa_flush_user_cache_all, fa_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(fa_flush_kern_cache_all) +SYM_TYPED_FUNC_START(fa_flush_kern_cache_all) mov ip, #0 mov r2, #VM_EXEC __flush_whole_cache: @@ -69,6 +70,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(fa_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -80,7 +82,7 @@ __flush_whole_cache: * - end - end address (exclusive, page aligned) * - flags - vma_area_struct flags describing address space */ -ENTRY(fa_flush_user_cache_range) +SYM_TYPED_FUNC_START(fa_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @ total size >= limit? @@ -97,6 +99,7 @@ ENTRY(fa_flush_user_cache_range) mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(fa_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -108,8 +111,11 @@ ENTRY(fa_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(fa_coherent_kern_range) - /* fall through */ +SYM_TYPED_FUNC_START(fa_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b fa_coherent_user_range +#endif +SYM_FUNC_END(fa_coherent_kern_range) /* * coherent_user_range(start, end) @@ -121,7 +127,7 @@ ENTRY(fa_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(fa_coherent_user_range) +SYM_TYPED_FUNC_START(fa_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -133,6 +139,7 @@ ENTRY(fa_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(fa_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -143,7 +150,7 @@ ENTRY(fa_coherent_user_range) * - addr - kernel address * - size - size of region */ -ENTRY(fa_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(fa_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line add r0, r0, #CACHE_DLINESIZE @@ -153,6 +160,7 @@ ENTRY(fa_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +SYM_FUNC_END(fa_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -203,7 +211,7 @@ fa_dma_clean_range: * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(fa_dma_flush_range) +SYM_TYPED_FUNC_START(fa_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -212,6 +220,7 @@ ENTRY(fa_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +SYM_FUNC_END(fa_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -219,13 +228,13 @@ ENTRY(fa_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(fa_dma_map_area) +SYM_TYPED_FUNC_START(fa_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq fa_dma_clean_range bcs fa_dma_inv_range b fa_dma_flush_range -ENDPROC(fa_dma_map_area) +SYM_FUNC_END(fa_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -233,14 +242,6 @@ ENDPROC(fa_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(fa_dma_unmap_area) +SYM_TYPED_FUNC_START(fa_dma_unmap_area) ret lr -ENDPROC(fa_dma_unmap_area) - - .globl fa_flush_kern_cache_louis - .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions fa +SYM_FUNC_END(fa_dma_unmap_area) diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S index 72d939ef8798..f68dde2014ee 100644 --- a/arch/arm/mm/cache-nop.S +++ b/arch/arm/mm/cache-nop.S @@ -1,47 +1,52 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include "proc-macros.S" -ENTRY(nop_flush_icache_all) +/* + * These are all open-coded instead of aliased, to make clear + * what is going on here: all functions are stubbed out. + */ +SYM_TYPED_FUNC_START(nop_flush_icache_all) ret lr -ENDPROC(nop_flush_icache_all) +SYM_FUNC_END(nop_flush_icache_all) - .globl nop_flush_kern_cache_all - .equ nop_flush_kern_cache_all, nop_flush_icache_all - - .globl nop_flush_kern_cache_louis - .equ nop_flush_kern_cache_louis, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_flush_kern_cache_all) + ret lr +SYM_FUNC_END(nop_flush_kern_cache_all) - .globl nop_flush_user_cache_all - .equ nop_flush_user_cache_all, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_flush_user_cache_all) + ret lr +SYM_FUNC_END(nop_flush_user_cache_all) - .globl nop_flush_user_cache_range - .equ nop_flush_user_cache_range, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_flush_user_cache_range) + ret lr +SYM_FUNC_END(nop_flush_user_cache_range) - .globl nop_coherent_kern_range - .equ nop_coherent_kern_range, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_coherent_kern_range) + ret lr +SYM_FUNC_END(nop_coherent_kern_range) -ENTRY(nop_coherent_user_range) +SYM_TYPED_FUNC_START(nop_coherent_user_range) mov r0, 0 ret lr -ENDPROC(nop_coherent_user_range) - - .globl nop_flush_kern_dcache_area - .equ nop_flush_kern_dcache_area, nop_flush_icache_all +SYM_FUNC_END(nop_coherent_user_range) - .globl nop_dma_flush_range - .equ nop_dma_flush_range, nop_flush_icache_all - - .globl nop_dma_map_area - .equ nop_dma_map_area, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_flush_kern_dcache_area) + ret lr +SYM_FUNC_END(nop_flush_kern_dcache_area) - .globl nop_dma_unmap_area - .equ nop_dma_unmap_area, nop_flush_icache_all +SYM_TYPED_FUNC_START(nop_dma_flush_range) + ret lr +SYM_FUNC_END(nop_dma_flush_range) - __INITDATA +SYM_TYPED_FUNC_START(nop_dma_map_area) + ret lr +SYM_FUNC_END(nop_dma_map_area) - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions nop +SYM_TYPED_FUNC_START(nop_dma_unmap_area) + ret lr +SYM_FUNC_END(nop_dma_unmap_area) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 7787057e4990..0e94e5193dbd 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -15,9 +16,9 @@ * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(v4_flush_icache_all) +SYM_TYPED_FUNC_START(v4_flush_icache_all) ret lr -ENDPROC(v4_flush_icache_all) +SYM_FUNC_END(v4_flush_icache_all) /* * flush_user_cache_all() @@ -27,21 +28,22 @@ ENDPROC(v4_flush_icache_all) * * - mm - mm_struct describing address space */ -ENTRY(v4_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(v4_flush_user_cache_all, v4_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(v4_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v4_flush_kern_cache_all) #ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache ret lr #else - /* FALLTHROUGH */ + ret lr #endif +SYM_FUNC_END(v4_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -53,14 +55,15 @@ ENTRY(v4_flush_kern_cache_all) * - end - end address (exclusive, may not be aligned) * - flags - vma_area_struct flags describing address space */ -ENTRY(v4_flush_user_cache_range) +SYM_TYPED_FUNC_START(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ flush ID cache ret lr #else - /* FALLTHROUGH */ + ret lr #endif +SYM_FUNC_END(v4_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -72,8 +75,9 @@ ENTRY(v4_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(v4_coherent_kern_range) + ret lr +SYM_FUNC_END(v4_coherent_kern_range) /* * coherent_user_range(start, end) @@ -85,9 +89,10 @@ ENTRY(v4_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4_coherent_user_range) +SYM_TYPED_FUNC_START(v4_coherent_user_range) mov r0, #0 ret lr +SYM_FUNC_END(v4_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -98,8 +103,11 @@ ENTRY(v4_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(v4_flush_kern_dcache_area) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(v4_flush_kern_dcache_area) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v4_dma_flush_range +#endif +SYM_FUNC_END(v4_flush_kern_dcache_area) /* * dma_flush_range(start, end) @@ -109,12 +117,13 @@ ENTRY(v4_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4_dma_flush_range) +SYM_TYPED_FUNC_START(v4_dma_flush_range) #ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache #endif ret lr +SYM_FUNC_END(v4_dma_flush_range) /* * dma_unmap_area(start, size, dir) @@ -122,10 +131,11 @@ ENTRY(v4_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(v4_dma_unmap_area) +SYM_TYPED_FUNC_START(v4_dma_unmap_area) teq r2, #DMA_TO_DEVICE bne v4_dma_flush_range - /* FALLTHROUGH */ + ret lr +SYM_FUNC_END(v4_dma_unmap_area) /* * dma_map_area(start, size, dir) @@ -133,15 +143,6 @@ ENTRY(v4_dma_unmap_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v4_dma_map_area) +SYM_TYPED_FUNC_START(v4_dma_map_area) ret lr -ENDPROC(v4_dma_unmap_area) -ENDPROC(v4_dma_map_area) - - .globl v4_flush_kern_cache_louis - .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v4 +SYM_FUNC_END(v4_dma_map_area) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index ad382cee0fdb..ce55a2eef5da 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -53,11 +54,11 @@ flush_base: * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(v4wb_flush_icache_all) +SYM_TYPED_FUNC_START(v4wb_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(v4wb_flush_icache_all) +SYM_FUNC_END(v4wb_flush_icache_all) /* * flush_user_cache_all() @@ -65,14 +66,14 @@ ENDPROC(v4wb_flush_icache_all) * Clean and invalidate all cache entries in a particular address * space. */ -ENTRY(v4wb_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(v4wb_flush_user_cache_all, v4wb_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(v4wb_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v4wb_flush_kern_cache_all) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache __flush_whole_cache: @@ -93,6 +94,7 @@ __flush_whole_cache: #endif mcr p15, 0, ip, c7, c10, 4 @ drain write buffer ret lr +SYM_FUNC_END(v4wb_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -104,7 +106,7 @@ __flush_whole_cache: * - end - end address (exclusive, page aligned) * - flags - vma_area_struct flags describing address space */ -ENTRY(v4wb_flush_user_cache_range) +SYM_TYPED_FUNC_START(v4wb_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size tst r2, #VM_EXEC @ executable region? @@ -121,6 +123,7 @@ ENTRY(v4wb_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer ret lr +SYM_FUNC_END(v4wb_flush_user_cache_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -131,9 +134,12 @@ ENTRY(v4wb_flush_user_cache_range) * - addr - kernel address * - size - region size */ -ENTRY(v4wb_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(v4wb_flush_kern_dcache_area) add r1, r0, r1 - /* fall through */ +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v4wb_coherent_user_range +#endif +SYM_FUNC_END(v4wb_flush_kern_dcache_area) /* * coherent_kern_range(start, end) @@ -145,8 +151,11 @@ ENTRY(v4wb_flush_kern_dcache_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_coherent_kern_range) - /* fall through */ +SYM_TYPED_FUNC_START(v4wb_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v4wb_coherent_user_range +#endif +SYM_FUNC_END(v4wb_coherent_kern_range) /* * coherent_user_range(start, end) @@ -158,7 +167,7 @@ ENTRY(v4wb_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_coherent_user_range) +SYM_TYPED_FUNC_START(v4wb_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry @@ -169,7 +178,7 @@ ENTRY(v4wb_coherent_user_range) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr - +SYM_FUNC_END(v4wb_coherent_user_range) /* * dma_inv_range(start, end) @@ -231,13 +240,13 @@ v4wb_dma_clean_range: * - size - size of region * - dir - DMA direction */ -ENTRY(v4wb_dma_map_area) +SYM_TYPED_FUNC_START(v4wb_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq v4wb_dma_clean_range bcs v4wb_dma_inv_range b v4wb_dma_flush_range -ENDPROC(v4wb_dma_map_area) +SYM_FUNC_END(v4wb_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -245,14 +254,6 @@ ENDPROC(v4wb_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v4wb_dma_unmap_area) +SYM_TYPED_FUNC_START(v4wb_dma_unmap_area) ret lr -ENDPROC(v4wb_dma_unmap_area) - - .globl v4wb_flush_kern_cache_louis - .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v4wb +SYM_FUNC_END(v4wb_dma_unmap_area) diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 0b290c25a99d..a97dc267b3b0 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -43,11 +44,11 @@ * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(v4wt_flush_icache_all) +SYM_TYPED_FUNC_START(v4wt_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(v4wt_flush_icache_all) +SYM_FUNC_END(v4wt_flush_icache_all) /* * flush_user_cache_all() @@ -55,14 +56,14 @@ ENDPROC(v4wt_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(v4wt_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(v4wt_flush_user_cache_all, v4wt_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(v4wt_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v4wt_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -70,6 +71,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache ret lr +SYM_FUNC_END(v4wt_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -81,7 +83,7 @@ __flush_whole_cache: * - end - end address (exclusive, page aligned) * - flags - vma_area_struct flags describing address space */ -ENTRY(v4wt_flush_user_cache_range) +SYM_TYPED_FUNC_START(v4wt_flush_user_cache_range) sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT bhs __flush_whole_cache @@ -93,6 +95,7 @@ ENTRY(v4wt_flush_user_cache_range) cmp r0, r1 blo 1b ret lr +SYM_FUNC_END(v4wt_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -104,8 +107,11 @@ ENTRY(v4wt_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wt_coherent_kern_range) - /* FALLTRHOUGH */ +SYM_TYPED_FUNC_START(v4wt_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v4wt_coherent_user_range +#endif +SYM_FUNC_END(v4wt_coherent_kern_range) /* * coherent_user_range(start, end) @@ -117,7 +123,7 @@ ENTRY(v4wt_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wt_coherent_user_range) +SYM_TYPED_FUNC_START(v4wt_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry add r0, r0, #CACHE_DLINESIZE @@ -125,6 +131,7 @@ ENTRY(v4wt_coherent_user_range) blo 1b mov r0, #0 ret lr +SYM_FUNC_END(v4wt_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -135,11 +142,12 @@ ENTRY(v4wt_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(v4wt_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(v4wt_flush_kern_dcache_area) mov r2, #0 mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache add r1, r0, r1 - /* fallthrough */ + b v4wt_dma_inv_range +SYM_FUNC_END(v4wt_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -167,9 +175,10 @@ v4wt_dma_inv_range: * * - start - virtual start address * - end - virtual end address - */ - .globl v4wt_dma_flush_range - .equ v4wt_dma_flush_range, v4wt_dma_inv_range +*/ +SYM_TYPED_FUNC_START(v4wt_dma_flush_range) + b v4wt_dma_inv_range +SYM_FUNC_END(v4wt_dma_flush_range) /* * dma_unmap_area(start, size, dir) @@ -177,11 +186,12 @@ v4wt_dma_inv_range: * - size - size of region * - dir - DMA direction */ -ENTRY(v4wt_dma_unmap_area) +SYM_TYPED_FUNC_START(v4wt_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v4wt_dma_inv_range - /* FALLTHROUGH */ + ret lr +SYM_FUNC_END(v4wt_dma_unmap_area) /* * dma_map_area(start, size, dir) @@ -189,15 +199,6 @@ ENTRY(v4wt_dma_unmap_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v4wt_dma_map_area) +SYM_TYPED_FUNC_START(v4wt_dma_map_area) ret lr -ENDPROC(v4wt_dma_unmap_area) -ENDPROC(v4wt_dma_map_area) - - .globl v4wt_flush_kern_cache_louis - .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v4wt +SYM_FUNC_END(v4wt_dma_map_area) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 44211d8a296f..9f415476e218 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -8,6 +8,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/errno.h> #include <asm/unwind.h> @@ -34,7 +35,7 @@ * r0 - set to 0 * r1 - corrupted */ -ENTRY(v6_flush_icache_all) +SYM_TYPED_FUNC_START(v6_flush_icache_all) mov r0, #0 #ifdef CONFIG_ARM_ERRATA_411920 mrs r1, cpsr @@ -51,7 +52,7 @@ ENTRY(v6_flush_icache_all) mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache #endif ret lr -ENDPROC(v6_flush_icache_all) +SYM_FUNC_END(v6_flush_icache_all) /* * v6_flush_cache_all() @@ -60,7 +61,7 @@ ENDPROC(v6_flush_icache_all) * * It is assumed that: */ -ENTRY(v6_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v6_flush_kern_cache_all) mov r0, #0 #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate @@ -73,6 +74,7 @@ ENTRY(v6_flush_kern_cache_all) mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate #endif ret lr +SYM_FUNC_END(v6_flush_kern_cache_all) /* * v6_flush_cache_all() @@ -81,8 +83,9 @@ ENTRY(v6_flush_kern_cache_all) * * - mm - mm_struct describing address space */ -ENTRY(v6_flush_user_cache_all) - /*FALLTHROUGH*/ +SYM_TYPED_FUNC_START(v6_flush_user_cache_all) + ret lr +SYM_FUNC_END(v6_flush_user_cache_all) /* * v6_flush_cache_range(start, end, flags) @@ -96,8 +99,9 @@ ENTRY(v6_flush_user_cache_all) * It is assumed that: * - we have a VIPT cache. */ -ENTRY(v6_flush_user_cache_range) +SYM_TYPED_FUNC_START(v6_flush_user_cache_range) ret lr +SYM_FUNC_END(v6_flush_user_cache_range) /* * v6_coherent_kern_range(start,end) @@ -112,8 +116,11 @@ ENTRY(v6_flush_user_cache_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v6_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(v6_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v6_coherent_user_range +#endif +SYM_FUNC_END(v6_coherent_kern_range) /* * v6_coherent_user_range(start,end) @@ -128,7 +135,7 @@ ENTRY(v6_coherent_kern_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v6_coherent_user_range) +SYM_TYPED_FUNC_START(v6_coherent_user_range) UNWIND(.fnstart ) #ifdef HARVARD_CACHE bic r0, r0, #CACHE_LINE_SIZE - 1 @@ -159,8 +166,7 @@ ENTRY(v6_coherent_user_range) mov r0, #-EFAULT ret lr UNWIND(.fnend ) -ENDPROC(v6_coherent_user_range) -ENDPROC(v6_coherent_kern_range) +SYM_FUNC_END(v6_coherent_user_range) /* * v6_flush_kern_dcache_area(void *addr, size_t size) @@ -171,7 +177,7 @@ ENDPROC(v6_coherent_kern_range) * - addr - kernel address * - size - region size */ -ENTRY(v6_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(v6_flush_kern_dcache_area) add r1, r0, r1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: @@ -188,7 +194,7 @@ ENTRY(v6_flush_kern_dcache_area) mcr p15, 0, r0, c7, c10, 4 #endif ret lr - +SYM_FUNC_END(v6_flush_kern_dcache_area) /* * v6_dma_inv_range(start,end) @@ -253,7 +259,7 @@ v6_dma_clean_range: * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v6_dma_flush_range) +SYM_TYPED_FUNC_START(v6_dma_flush_range) bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: #ifdef HARVARD_CACHE @@ -267,6 +273,7 @@ ENTRY(v6_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +SYM_FUNC_END(v6_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -274,12 +281,12 @@ ENTRY(v6_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(v6_dma_map_area) +SYM_TYPED_FUNC_START(v6_dma_map_area) add r1, r1, r0 teq r2, #DMA_FROM_DEVICE beq v6_dma_inv_range b v6_dma_clean_range -ENDPROC(v6_dma_map_area) +SYM_FUNC_END(v6_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -287,17 +294,9 @@ ENDPROC(v6_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v6_dma_unmap_area) +SYM_TYPED_FUNC_START(v6_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v6_dma_inv_range ret lr -ENDPROC(v6_dma_unmap_area) - - .globl v6_flush_kern_cache_louis - .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v6 +SYM_FUNC_END(v6_dma_unmap_area) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 127afe2096ba..201ca05436fa 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/errno.h> #include <asm/unwind.h> @@ -80,12 +81,12 @@ ENDPROC(v7_invalidate_l1) * Registers: * r0 - set to 0 */ -ENTRY(v7_flush_icache_all) +SYM_TYPED_FUNC_START(v7_flush_icache_all) mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ret lr -ENDPROC(v7_flush_icache_all) +SYM_FUNC_END(v7_flush_icache_all) /* * v7_flush_dcache_louis() @@ -193,7 +194,7 @@ ENDPROC(v7_flush_dcache_all) * unification in a single instruction. * */ -ENTRY(v7_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v7_flush_kern_cache_all) stmfd sp!, {r4-r6, r9-r10, lr} bl v7_flush_dcache_all mov r0, #0 @@ -201,7 +202,7 @@ ENTRY(v7_flush_kern_cache_all) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ldmfd sp!, {r4-r6, r9-r10, lr} ret lr -ENDPROC(v7_flush_kern_cache_all) +SYM_FUNC_END(v7_flush_kern_cache_all) /* * v7_flush_kern_cache_louis(void) @@ -209,7 +210,7 @@ ENDPROC(v7_flush_kern_cache_all) * Flush the data cache up to Level of Unification Inner Shareable. * Invalidate the I-cache to the point of unification. */ -ENTRY(v7_flush_kern_cache_louis) +SYM_TYPED_FUNC_START(v7_flush_kern_cache_louis) stmfd sp!, {r4-r6, r9-r10, lr} bl v7_flush_dcache_louis mov r0, #0 @@ -217,7 +218,7 @@ ENTRY(v7_flush_kern_cache_louis) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ldmfd sp!, {r4-r6, r9-r10, lr} ret lr -ENDPROC(v7_flush_kern_cache_louis) +SYM_FUNC_END(v7_flush_kern_cache_louis) /* * v7_flush_cache_all() @@ -226,8 +227,9 @@ ENDPROC(v7_flush_kern_cache_louis) * * - mm - mm_struct describing address space */ -ENTRY(v7_flush_user_cache_all) - /*FALLTHROUGH*/ +SYM_TYPED_FUNC_START(v7_flush_user_cache_all) + ret lr +SYM_FUNC_END(v7_flush_user_cache_all) /* * v7_flush_cache_range(start, end, flags) @@ -241,10 +243,9 @@ ENTRY(v7_flush_user_cache_all) * It is assumed that: * - we have a VIPT cache. */ -ENTRY(v7_flush_user_cache_range) +SYM_TYPED_FUNC_START(v7_flush_user_cache_range) ret lr -ENDPROC(v7_flush_user_cache_all) -ENDPROC(v7_flush_user_cache_range) +SYM_FUNC_END(v7_flush_user_cache_range) /* * v7_coherent_kern_range(start,end) @@ -259,8 +260,11 @@ ENDPROC(v7_flush_user_cache_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v7_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(v7_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v7_coherent_user_range +#endif +SYM_FUNC_END(v7_coherent_kern_range) /* * v7_coherent_user_range(start,end) @@ -275,7 +279,7 @@ ENTRY(v7_coherent_kern_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v7_coherent_user_range) +SYM_TYPED_FUNC_START(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 @@ -321,8 +325,7 @@ ENTRY(v7_coherent_user_range) mov r0, #-EFAULT ret lr UNWIND(.fnend ) -ENDPROC(v7_coherent_kern_range) -ENDPROC(v7_coherent_user_range) +SYM_FUNC_END(v7_coherent_user_range) /* * v7_flush_kern_dcache_area(void *addr, size_t size) @@ -333,7 +336,7 @@ ENDPROC(v7_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(v7_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(v7_flush_kern_dcache_area) dcache_line_size r2, r3 add r1, r0, r1 sub r3, r2, #1 @@ -349,7 +352,7 @@ ENTRY(v7_flush_kern_dcache_area) blo 1b dsb st ret lr -ENDPROC(v7_flush_kern_dcache_area) +SYM_FUNC_END(v7_flush_kern_dcache_area) /* * v7_dma_inv_range(start,end) @@ -413,7 +416,7 @@ ENDPROC(v7_dma_clean_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7_dma_flush_range) +SYM_TYPED_FUNC_START(v7_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -428,7 +431,7 @@ ENTRY(v7_dma_flush_range) blo 1b dsb st ret lr -ENDPROC(v7_dma_flush_range) +SYM_FUNC_END(v7_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -436,12 +439,12 @@ ENDPROC(v7_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(v7_dma_map_area) +SYM_TYPED_FUNC_START(v7_dma_map_area) add r1, r1, r0 teq r2, #DMA_FROM_DEVICE beq v7_dma_inv_range b v7_dma_clean_range -ENDPROC(v7_dma_map_area) +SYM_FUNC_END(v7_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -449,34 +452,9 @@ ENDPROC(v7_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v7_dma_unmap_area) +SYM_TYPED_FUNC_START(v7_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v7_dma_inv_range ret lr -ENDPROC(v7_dma_unmap_area) - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v7 - - /* The Broadcom Brahma-B15 read-ahead cache requires some modifications - * to the v7_cache_fns, we only override the ones we need - */ -#ifndef CONFIG_CACHE_B15_RAC - globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all -#endif - globl_equ b15_flush_icache_all, v7_flush_icache_all - globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis - globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all - globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range - globl_equ b15_coherent_kern_range, v7_coherent_kern_range - globl_equ b15_coherent_user_range, v7_coherent_user_range - globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area - - globl_equ b15_dma_map_area, v7_dma_map_area - globl_equ b15_dma_unmap_area, v7_dma_unmap_area - globl_equ b15_dma_flush_range, v7_dma_flush_range - - define_cache_functions b15 +SYM_FUNC_END(v7_dma_unmap_area) diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index eb60b5e5e2ad..14d719eba729 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/errno.h> #include <asm/unwind.h> @@ -159,10 +160,10 @@ ENDPROC(v7m_invalidate_l1) * Registers: * r0 - set to 0 */ -ENTRY(v7m_flush_icache_all) +SYM_TYPED_FUNC_START(v7m_flush_icache_all) invalidate_icache r0 ret lr -ENDPROC(v7m_flush_icache_all) +SYM_FUNC_END(v7m_flush_icache_all) /* * v7m_flush_dcache_all() @@ -236,13 +237,13 @@ ENDPROC(v7m_flush_dcache_all) * unification in a single instruction. * */ -ENTRY(v7m_flush_kern_cache_all) +SYM_TYPED_FUNC_START(v7m_flush_kern_cache_all) stmfd sp!, {r4-r7, r9-r11, lr} bl v7m_flush_dcache_all invalidate_icache r0 ldmfd sp!, {r4-r7, r9-r11, lr} ret lr -ENDPROC(v7m_flush_kern_cache_all) +SYM_FUNC_END(v7m_flush_kern_cache_all) /* * v7m_flush_cache_all() @@ -251,8 +252,9 @@ ENDPROC(v7m_flush_kern_cache_all) * * - mm - mm_struct describing address space */ -ENTRY(v7m_flush_user_cache_all) - /*FALLTHROUGH*/ +SYM_TYPED_FUNC_START(v7m_flush_user_cache_all) + ret lr +SYM_FUNC_END(v7m_flush_user_cache_all) /* * v7m_flush_cache_range(start, end, flags) @@ -266,10 +268,9 @@ ENTRY(v7m_flush_user_cache_all) * It is assumed that: * - we have a VIPT cache. */ -ENTRY(v7m_flush_user_cache_range) +SYM_TYPED_FUNC_START(v7m_flush_user_cache_range) ret lr -ENDPROC(v7m_flush_user_cache_all) -ENDPROC(v7m_flush_user_cache_range) +SYM_FUNC_END(v7m_flush_user_cache_range) /* * v7m_coherent_kern_range(start,end) @@ -284,8 +285,11 @@ ENDPROC(v7m_flush_user_cache_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v7m_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(v7m_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b v7m_coherent_user_range +#endif +SYM_FUNC_END(v7m_coherent_kern_range) /* * v7m_coherent_user_range(start,end) @@ -300,7 +304,7 @@ ENTRY(v7m_coherent_kern_range) * It is assumed that: * - the Icache does not read data from the write buffer */ -ENTRY(v7m_coherent_user_range) +SYM_TYPED_FUNC_START(v7m_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 @@ -328,8 +332,7 @@ ENTRY(v7m_coherent_user_range) isb ret lr UNWIND(.fnend ) -ENDPROC(v7m_coherent_kern_range) -ENDPROC(v7m_coherent_user_range) +SYM_FUNC_END(v7m_coherent_user_range) /* * v7m_flush_kern_dcache_area(void *addr, size_t size) @@ -340,7 +343,7 @@ ENDPROC(v7m_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(v7m_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(v7m_flush_kern_dcache_area) dcache_line_size r2, r3 add r1, r0, r1 sub r3, r2, #1 @@ -352,7 +355,7 @@ ENTRY(v7m_flush_kern_dcache_area) blo 1b dsb st ret lr -ENDPROC(v7m_flush_kern_dcache_area) +SYM_FUNC_END(v7m_flush_kern_dcache_area) /* * v7m_dma_inv_range(start,end) @@ -408,7 +411,7 @@ ENDPROC(v7m_dma_clean_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v7m_dma_flush_range) +SYM_TYPED_FUNC_START(v7m_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 @@ -419,7 +422,7 @@ ENTRY(v7m_dma_flush_range) blo 1b dsb st ret lr -ENDPROC(v7m_dma_flush_range) +SYM_FUNC_END(v7m_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -427,12 +430,12 @@ ENDPROC(v7m_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(v7m_dma_map_area) +SYM_TYPED_FUNC_START(v7m_dma_map_area) add r1, r1, r0 teq r2, #DMA_FROM_DEVICE beq v7m_dma_inv_range b v7m_dma_clean_range -ENDPROC(v7m_dma_map_area) +SYM_FUNC_END(v7m_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -440,17 +443,9 @@ ENDPROC(v7m_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(v7m_dma_unmap_area) +SYM_TYPED_FUNC_START(v7m_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v7m_dma_inv_range ret lr -ENDPROC(v7m_dma_unmap_area) - - .globl v7m_flush_kern_cache_louis - .equ v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions v7m +SYM_FUNC_END(v7m_dma_unmap_area) diff --git a/arch/arm/mm/cache.c b/arch/arm/mm/cache.c new file mode 100644 index 000000000000..e6fbc599c9ed --- /dev/null +++ b/arch/arm/mm/cache.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file defines C prototypes for the low-level cache assembly functions + * and populates a vtable for each selected ARM CPU cache type. + */ + +#include <linux/types.h> +#include <asm/cacheflush.h> + +#ifdef CONFIG_CPU_CACHE_V4 +void v4_flush_icache_all(void); +void v4_flush_kern_cache_all(void); +void v4_flush_user_cache_all(void); +void v4_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v4_coherent_kern_range(unsigned long, unsigned long); +int v4_coherent_user_range(unsigned long, unsigned long); +void v4_flush_kern_dcache_area(void *, size_t); +void v4_dma_map_area(const void *, size_t, int); +void v4_dma_unmap_area(const void *, size_t, int); +void v4_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v4_cache_fns __initconst = { + .flush_icache_all = v4_flush_icache_all, + .flush_kern_all = v4_flush_kern_cache_all, + .flush_kern_louis = v4_flush_kern_cache_all, + .flush_user_all = v4_flush_user_cache_all, + .flush_user_range = v4_flush_user_cache_range, + .coherent_kern_range = v4_coherent_kern_range, + .coherent_user_range = v4_coherent_user_range, + .flush_kern_dcache_area = v4_flush_kern_dcache_area, + .dma_map_area = v4_dma_map_area, + .dma_unmap_area = v4_dma_unmap_area, + .dma_flush_range = v4_dma_flush_range, +}; +#endif + +/* V4 write-back cache "V4WB" */ +#ifdef CONFIG_CPU_CACHE_V4WB +void v4wb_flush_icache_all(void); +void v4wb_flush_kern_cache_all(void); +void v4wb_flush_user_cache_all(void); +void v4wb_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v4wb_coherent_kern_range(unsigned long, unsigned long); +int v4wb_coherent_user_range(unsigned long, unsigned long); +void v4wb_flush_kern_dcache_area(void *, size_t); +void v4wb_dma_map_area(const void *, size_t, int); +void v4wb_dma_unmap_area(const void *, size_t, int); +void v4wb_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v4wb_cache_fns __initconst = { + .flush_icache_all = v4wb_flush_icache_all, + .flush_kern_all = v4wb_flush_kern_cache_all, + .flush_kern_louis = v4wb_flush_kern_cache_all, + .flush_user_all = v4wb_flush_user_cache_all, + .flush_user_range = v4wb_flush_user_cache_range, + .coherent_kern_range = v4wb_coherent_kern_range, + .coherent_user_range = v4wb_coherent_user_range, + .flush_kern_dcache_area = v4wb_flush_kern_dcache_area, + .dma_map_area = v4wb_dma_map_area, + .dma_unmap_area = v4wb_dma_unmap_area, + .dma_flush_range = v4wb_dma_flush_range, +}; +#endif + +/* V4 write-through cache "V4WT" */ +#ifdef CONFIG_CPU_CACHE_V4WT +void v4wt_flush_icache_all(void); +void v4wt_flush_kern_cache_all(void); +void v4wt_flush_user_cache_all(void); +void v4wt_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v4wt_coherent_kern_range(unsigned long, unsigned long); +int v4wt_coherent_user_range(unsigned long, unsigned long); +void v4wt_flush_kern_dcache_area(void *, size_t); +void v4wt_dma_map_area(const void *, size_t, int); +void v4wt_dma_unmap_area(const void *, size_t, int); +void v4wt_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v4wt_cache_fns __initconst = { + .flush_icache_all = v4wt_flush_icache_all, + .flush_kern_all = v4wt_flush_kern_cache_all, + .flush_kern_louis = v4wt_flush_kern_cache_all, + .flush_user_all = v4wt_flush_user_cache_all, + .flush_user_range = v4wt_flush_user_cache_range, + .coherent_kern_range = v4wt_coherent_kern_range, + .coherent_user_range = v4wt_coherent_user_range, + .flush_kern_dcache_area = v4wt_flush_kern_dcache_area, + .dma_map_area = v4wt_dma_map_area, + .dma_unmap_area = v4wt_dma_unmap_area, + .dma_flush_range = v4wt_dma_flush_range, +}; +#endif + +/* Faraday FA526 cache */ +#ifdef CONFIG_CPU_CACHE_FA +void fa_flush_icache_all(void); +void fa_flush_kern_cache_all(void); +void fa_flush_user_cache_all(void); +void fa_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void fa_coherent_kern_range(unsigned long, unsigned long); +int fa_coherent_user_range(unsigned long, unsigned long); +void fa_flush_kern_dcache_area(void *, size_t); +void fa_dma_map_area(const void *, size_t, int); +void fa_dma_unmap_area(const void *, size_t, int); +void fa_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns fa_cache_fns __initconst = { + .flush_icache_all = fa_flush_icache_all, + .flush_kern_all = fa_flush_kern_cache_all, + .flush_kern_louis = fa_flush_kern_cache_all, + .flush_user_all = fa_flush_user_cache_all, + .flush_user_range = fa_flush_user_cache_range, + .coherent_kern_range = fa_coherent_kern_range, + .coherent_user_range = fa_coherent_user_range, + .flush_kern_dcache_area = fa_flush_kern_dcache_area, + .dma_map_area = fa_dma_map_area, + .dma_unmap_area = fa_dma_unmap_area, + .dma_flush_range = fa_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_CACHE_V6 +void v6_flush_icache_all(void); +void v6_flush_kern_cache_all(void); +void v6_flush_user_cache_all(void); +void v6_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v6_coherent_kern_range(unsigned long, unsigned long); +int v6_coherent_user_range(unsigned long, unsigned long); +void v6_flush_kern_dcache_area(void *, size_t); +void v6_dma_map_area(const void *, size_t, int); +void v6_dma_unmap_area(const void *, size_t, int); +void v6_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v6_cache_fns __initconst = { + .flush_icache_all = v6_flush_icache_all, + .flush_kern_all = v6_flush_kern_cache_all, + .flush_kern_louis = v6_flush_kern_cache_all, + .flush_user_all = v6_flush_user_cache_all, + .flush_user_range = v6_flush_user_cache_range, + .coherent_kern_range = v6_coherent_kern_range, + .coherent_user_range = v6_coherent_user_range, + .flush_kern_dcache_area = v6_flush_kern_dcache_area, + .dma_map_area = v6_dma_map_area, + .dma_unmap_area = v6_dma_unmap_area, + .dma_flush_range = v6_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_CACHE_V7 +void v7_flush_icache_all(void); +void v7_flush_kern_cache_all(void); +void v7_flush_kern_cache_louis(void); +void v7_flush_user_cache_all(void); +void v7_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v7_coherent_kern_range(unsigned long, unsigned long); +int v7_coherent_user_range(unsigned long, unsigned long); +void v7_flush_kern_dcache_area(void *, size_t); +void v7_dma_map_area(const void *, size_t, int); +void v7_dma_unmap_area(const void *, size_t, int); +void v7_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v7_cache_fns __initconst = { + .flush_icache_all = v7_flush_icache_all, + .flush_kern_all = v7_flush_kern_cache_all, + .flush_kern_louis = v7_flush_kern_cache_louis, + .flush_user_all = v7_flush_user_cache_all, + .flush_user_range = v7_flush_user_cache_range, + .coherent_kern_range = v7_coherent_kern_range, + .coherent_user_range = v7_coherent_user_range, + .flush_kern_dcache_area = v7_flush_kern_dcache_area, + .dma_map_area = v7_dma_map_area, + .dma_unmap_area = v7_dma_unmap_area, + .dma_flush_range = v7_dma_flush_range, +}; + +/* Special quirky cache flush function for Broadcom B15 v7 caches */ +void b15_flush_kern_cache_all(void); + +struct cpu_cache_fns b15_cache_fns __initconst = { + .flush_icache_all = v7_flush_icache_all, +#ifdef CONFIG_CACHE_B15_RAC + .flush_kern_all = b15_flush_kern_cache_all, +#else + .flush_kern_all = v7_flush_kern_cache_all, +#endif + .flush_kern_louis = v7_flush_kern_cache_louis, + .flush_user_all = v7_flush_user_cache_all, + .flush_user_range = v7_flush_user_cache_range, + .coherent_kern_range = v7_coherent_kern_range, + .coherent_user_range = v7_coherent_user_range, + .flush_kern_dcache_area = v7_flush_kern_dcache_area, + .dma_map_area = v7_dma_map_area, + .dma_unmap_area = v7_dma_unmap_area, + .dma_flush_range = v7_dma_flush_range, +}; +#endif + +/* The NOP cache is just a set of dummy stubs that by definition does nothing */ +#ifdef CONFIG_CPU_CACHE_NOP +void nop_flush_icache_all(void); +void nop_flush_kern_cache_all(void); +void nop_flush_user_cache_all(void); +void nop_flush_user_cache_range(unsigned long start, unsigned long end, unsigned int flags); +void nop_coherent_kern_range(unsigned long start, unsigned long end); +int nop_coherent_user_range(unsigned long, unsigned long); +void nop_flush_kern_dcache_area(void *kaddr, size_t size); +void nop_dma_map_area(const void *start, size_t size, int flags); +void nop_dma_unmap_area(const void *start, size_t size, int flags); +void nop_dma_flush_range(const void *start, const void *end); + +struct cpu_cache_fns nop_cache_fns __initconst = { + .flush_icache_all = nop_flush_icache_all, + .flush_kern_all = nop_flush_kern_cache_all, + .flush_kern_louis = nop_flush_kern_cache_all, + .flush_user_all = nop_flush_user_cache_all, + .flush_user_range = nop_flush_user_cache_range, + .coherent_kern_range = nop_coherent_kern_range, + .coherent_user_range = nop_coherent_user_range, + .flush_kern_dcache_area = nop_flush_kern_dcache_area, + .dma_map_area = nop_dma_map_area, + .dma_unmap_area = nop_dma_unmap_area, + .dma_flush_range = nop_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_CACHE_V7M +void v7m_flush_icache_all(void); +void v7m_flush_kern_cache_all(void); +void v7m_flush_user_cache_all(void); +void v7m_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void v7m_coherent_kern_range(unsigned long, unsigned long); +int v7m_coherent_user_range(unsigned long, unsigned long); +void v7m_flush_kern_dcache_area(void *, size_t); +void v7m_dma_map_area(const void *, size_t, int); +void v7m_dma_unmap_area(const void *, size_t, int); +void v7m_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns v7m_cache_fns __initconst = { + .flush_icache_all = v7m_flush_icache_all, + .flush_kern_all = v7m_flush_kern_cache_all, + .flush_kern_louis = v7m_flush_kern_cache_all, + .flush_user_all = v7m_flush_user_cache_all, + .flush_user_range = v7m_flush_user_cache_range, + .coherent_kern_range = v7m_coherent_kern_range, + .coherent_user_range = v7m_coherent_user_range, + .flush_kern_dcache_area = v7m_flush_kern_dcache_area, + .dma_map_area = v7m_dma_map_area, + .dma_unmap_area = v7m_dma_unmap_area, + .dma_flush_range = v7m_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM1020 +void arm1020_flush_icache_all(void); +void arm1020_flush_kern_cache_all(void); +void arm1020_flush_user_cache_all(void); +void arm1020_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm1020_coherent_kern_range(unsigned long, unsigned long); +int arm1020_coherent_user_range(unsigned long, unsigned long); +void arm1020_flush_kern_dcache_area(void *, size_t); +void arm1020_dma_map_area(const void *, size_t, int); +void arm1020_dma_unmap_area(const void *, size_t, int); +void arm1020_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm1020_cache_fns __initconst = { + .flush_icache_all = arm1020_flush_icache_all, + .flush_kern_all = arm1020_flush_kern_cache_all, + .flush_kern_louis = arm1020_flush_kern_cache_all, + .flush_user_all = arm1020_flush_user_cache_all, + .flush_user_range = arm1020_flush_user_cache_range, + .coherent_kern_range = arm1020_coherent_kern_range, + .coherent_user_range = arm1020_coherent_user_range, + .flush_kern_dcache_area = arm1020_flush_kern_dcache_area, + .dma_map_area = arm1020_dma_map_area, + .dma_unmap_area = arm1020_dma_unmap_area, + .dma_flush_range = arm1020_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM1020E +void arm1020e_flush_icache_all(void); +void arm1020e_flush_kern_cache_all(void); +void arm1020e_flush_user_cache_all(void); +void arm1020e_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm1020e_coherent_kern_range(unsigned long, unsigned long); +int arm1020e_coherent_user_range(unsigned long, unsigned long); +void arm1020e_flush_kern_dcache_area(void *, size_t); +void arm1020e_dma_map_area(const void *, size_t, int); +void arm1020e_dma_unmap_area(const void *, size_t, int); +void arm1020e_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm1020e_cache_fns __initconst = { + .flush_icache_all = arm1020e_flush_icache_all, + .flush_kern_all = arm1020e_flush_kern_cache_all, + .flush_kern_louis = arm1020e_flush_kern_cache_all, + .flush_user_all = arm1020e_flush_user_cache_all, + .flush_user_range = arm1020e_flush_user_cache_range, + .coherent_kern_range = arm1020e_coherent_kern_range, + .coherent_user_range = arm1020e_coherent_user_range, + .flush_kern_dcache_area = arm1020e_flush_kern_dcache_area, + .dma_map_area = arm1020e_dma_map_area, + .dma_unmap_area = arm1020e_dma_unmap_area, + .dma_flush_range = arm1020e_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM1022 +void arm1022_flush_icache_all(void); +void arm1022_flush_kern_cache_all(void); +void arm1022_flush_user_cache_all(void); +void arm1022_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm1022_coherent_kern_range(unsigned long, unsigned long); +int arm1022_coherent_user_range(unsigned long, unsigned long); +void arm1022_flush_kern_dcache_area(void *, size_t); +void arm1022_dma_map_area(const void *, size_t, int); +void arm1022_dma_unmap_area(const void *, size_t, int); +void arm1022_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm1022_cache_fns __initconst = { + .flush_icache_all = arm1022_flush_icache_all, + .flush_kern_all = arm1022_flush_kern_cache_all, + .flush_kern_louis = arm1022_flush_kern_cache_all, + .flush_user_all = arm1022_flush_user_cache_all, + .flush_user_range = arm1022_flush_user_cache_range, + .coherent_kern_range = arm1022_coherent_kern_range, + .coherent_user_range = arm1022_coherent_user_range, + .flush_kern_dcache_area = arm1022_flush_kern_dcache_area, + .dma_map_area = arm1022_dma_map_area, + .dma_unmap_area = arm1022_dma_unmap_area, + .dma_flush_range = arm1022_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM1026 +void arm1026_flush_icache_all(void); +void arm1026_flush_kern_cache_all(void); +void arm1026_flush_user_cache_all(void); +void arm1026_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm1026_coherent_kern_range(unsigned long, unsigned long); +int arm1026_coherent_user_range(unsigned long, unsigned long); +void arm1026_flush_kern_dcache_area(void *, size_t); +void arm1026_dma_map_area(const void *, size_t, int); +void arm1026_dma_unmap_area(const void *, size_t, int); +void arm1026_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm1026_cache_fns __initconst = { + .flush_icache_all = arm1026_flush_icache_all, + .flush_kern_all = arm1026_flush_kern_cache_all, + .flush_kern_louis = arm1026_flush_kern_cache_all, + .flush_user_all = arm1026_flush_user_cache_all, + .flush_user_range = arm1026_flush_user_cache_range, + .coherent_kern_range = arm1026_coherent_kern_range, + .coherent_user_range = arm1026_coherent_user_range, + .flush_kern_dcache_area = arm1026_flush_kern_dcache_area, + .dma_map_area = arm1026_dma_map_area, + .dma_unmap_area = arm1026_dma_unmap_area, + .dma_flush_range = arm1026_dma_flush_range, +}; +#endif + +#if defined(CONFIG_CPU_ARM920T) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH) +void arm920_flush_icache_all(void); +void arm920_flush_kern_cache_all(void); +void arm920_flush_user_cache_all(void); +void arm920_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm920_coherent_kern_range(unsigned long, unsigned long); +int arm920_coherent_user_range(unsigned long, unsigned long); +void arm920_flush_kern_dcache_area(void *, size_t); +void arm920_dma_map_area(const void *, size_t, int); +void arm920_dma_unmap_area(const void *, size_t, int); +void arm920_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm920_cache_fns __initconst = { + .flush_icache_all = arm920_flush_icache_all, + .flush_kern_all = arm920_flush_kern_cache_all, + .flush_kern_louis = arm920_flush_kern_cache_all, + .flush_user_all = arm920_flush_user_cache_all, + .flush_user_range = arm920_flush_user_cache_range, + .coherent_kern_range = arm920_coherent_kern_range, + .coherent_user_range = arm920_coherent_user_range, + .flush_kern_dcache_area = arm920_flush_kern_dcache_area, + .dma_map_area = arm920_dma_map_area, + .dma_unmap_area = arm920_dma_unmap_area, + .dma_flush_range = arm920_dma_flush_range, +}; +#endif + +#if defined(CONFIG_CPU_ARM922T) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH) +void arm922_flush_icache_all(void); +void arm922_flush_kern_cache_all(void); +void arm922_flush_user_cache_all(void); +void arm922_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm922_coherent_kern_range(unsigned long, unsigned long); +int arm922_coherent_user_range(unsigned long, unsigned long); +void arm922_flush_kern_dcache_area(void *, size_t); +void arm922_dma_map_area(const void *, size_t, int); +void arm922_dma_unmap_area(const void *, size_t, int); +void arm922_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm922_cache_fns __initconst = { + .flush_icache_all = arm922_flush_icache_all, + .flush_kern_all = arm922_flush_kern_cache_all, + .flush_kern_louis = arm922_flush_kern_cache_all, + .flush_user_all = arm922_flush_user_cache_all, + .flush_user_range = arm922_flush_user_cache_range, + .coherent_kern_range = arm922_coherent_kern_range, + .coherent_user_range = arm922_coherent_user_range, + .flush_kern_dcache_area = arm922_flush_kern_dcache_area, + .dma_map_area = arm922_dma_map_area, + .dma_unmap_area = arm922_dma_unmap_area, + .dma_flush_range = arm922_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM925T +void arm925_flush_icache_all(void); +void arm925_flush_kern_cache_all(void); +void arm925_flush_user_cache_all(void); +void arm925_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm925_coherent_kern_range(unsigned long, unsigned long); +int arm925_coherent_user_range(unsigned long, unsigned long); +void arm925_flush_kern_dcache_area(void *, size_t); +void arm925_dma_map_area(const void *, size_t, int); +void arm925_dma_unmap_area(const void *, size_t, int); +void arm925_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm925_cache_fns __initconst = { + .flush_icache_all = arm925_flush_icache_all, + .flush_kern_all = arm925_flush_kern_cache_all, + .flush_kern_louis = arm925_flush_kern_cache_all, + .flush_user_all = arm925_flush_user_cache_all, + .flush_user_range = arm925_flush_user_cache_range, + .coherent_kern_range = arm925_coherent_kern_range, + .coherent_user_range = arm925_coherent_user_range, + .flush_kern_dcache_area = arm925_flush_kern_dcache_area, + .dma_map_area = arm925_dma_map_area, + .dma_unmap_area = arm925_dma_unmap_area, + .dma_flush_range = arm925_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM926T +void arm926_flush_icache_all(void); +void arm926_flush_kern_cache_all(void); +void arm926_flush_user_cache_all(void); +void arm926_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm926_coherent_kern_range(unsigned long, unsigned long); +int arm926_coherent_user_range(unsigned long, unsigned long); +void arm926_flush_kern_dcache_area(void *, size_t); +void arm926_dma_map_area(const void *, size_t, int); +void arm926_dma_unmap_area(const void *, size_t, int); +void arm926_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm926_cache_fns __initconst = { + .flush_icache_all = arm926_flush_icache_all, + .flush_kern_all = arm926_flush_kern_cache_all, + .flush_kern_louis = arm926_flush_kern_cache_all, + .flush_user_all = arm926_flush_user_cache_all, + .flush_user_range = arm926_flush_user_cache_range, + .coherent_kern_range = arm926_coherent_kern_range, + .coherent_user_range = arm926_coherent_user_range, + .flush_kern_dcache_area = arm926_flush_kern_dcache_area, + .dma_map_area = arm926_dma_map_area, + .dma_unmap_area = arm926_dma_unmap_area, + .dma_flush_range = arm926_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM940T +void arm940_flush_icache_all(void); +void arm940_flush_kern_cache_all(void); +void arm940_flush_user_cache_all(void); +void arm940_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm940_coherent_kern_range(unsigned long, unsigned long); +int arm940_coherent_user_range(unsigned long, unsigned long); +void arm940_flush_kern_dcache_area(void *, size_t); +void arm940_dma_map_area(const void *, size_t, int); +void arm940_dma_unmap_area(const void *, size_t, int); +void arm940_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm940_cache_fns __initconst = { + .flush_icache_all = arm940_flush_icache_all, + .flush_kern_all = arm940_flush_kern_cache_all, + .flush_kern_louis = arm940_flush_kern_cache_all, + .flush_user_all = arm940_flush_user_cache_all, + .flush_user_range = arm940_flush_user_cache_range, + .coherent_kern_range = arm940_coherent_kern_range, + .coherent_user_range = arm940_coherent_user_range, + .flush_kern_dcache_area = arm940_flush_kern_dcache_area, + .dma_map_area = arm940_dma_map_area, + .dma_unmap_area = arm940_dma_unmap_area, + .dma_flush_range = arm940_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_ARM946E +void arm946_flush_icache_all(void); +void arm946_flush_kern_cache_all(void); +void arm946_flush_user_cache_all(void); +void arm946_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void arm946_coherent_kern_range(unsigned long, unsigned long); +int arm946_coherent_user_range(unsigned long, unsigned long); +void arm946_flush_kern_dcache_area(void *, size_t); +void arm946_dma_map_area(const void *, size_t, int); +void arm946_dma_unmap_area(const void *, size_t, int); +void arm946_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns arm946_cache_fns __initconst = { + .flush_icache_all = arm946_flush_icache_all, + .flush_kern_all = arm946_flush_kern_cache_all, + .flush_kern_louis = arm946_flush_kern_cache_all, + .flush_user_all = arm946_flush_user_cache_all, + .flush_user_range = arm946_flush_user_cache_range, + .coherent_kern_range = arm946_coherent_kern_range, + .coherent_user_range = arm946_coherent_user_range, + .flush_kern_dcache_area = arm946_flush_kern_dcache_area, + .dma_map_area = arm946_dma_map_area, + .dma_unmap_area = arm946_dma_unmap_area, + .dma_flush_range = arm946_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_XSCALE +void xscale_flush_icache_all(void); +void xscale_flush_kern_cache_all(void); +void xscale_flush_user_cache_all(void); +void xscale_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void xscale_coherent_kern_range(unsigned long, unsigned long); +int xscale_coherent_user_range(unsigned long, unsigned long); +void xscale_flush_kern_dcache_area(void *, size_t); +void xscale_dma_map_area(const void *, size_t, int); +void xscale_dma_unmap_area(const void *, size_t, int); +void xscale_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns xscale_cache_fns __initconst = { + .flush_icache_all = xscale_flush_icache_all, + .flush_kern_all = xscale_flush_kern_cache_all, + .flush_kern_louis = xscale_flush_kern_cache_all, + .flush_user_all = xscale_flush_user_cache_all, + .flush_user_range = xscale_flush_user_cache_range, + .coherent_kern_range = xscale_coherent_kern_range, + .coherent_user_range = xscale_coherent_user_range, + .flush_kern_dcache_area = xscale_flush_kern_dcache_area, + .dma_map_area = xscale_dma_map_area, + .dma_unmap_area = xscale_dma_unmap_area, + .dma_flush_range = xscale_dma_flush_range, +}; + +/* The 80200 A0 and A1 need a special quirk for dma_map_area() */ +void xscale_80200_A0_A1_dma_map_area(const void *, size_t, int); + +struct cpu_cache_fns xscale_80200_A0_A1_cache_fns __initconst = { + .flush_icache_all = xscale_flush_icache_all, + .flush_kern_all = xscale_flush_kern_cache_all, + .flush_kern_louis = xscale_flush_kern_cache_all, + .flush_user_all = xscale_flush_user_cache_all, + .flush_user_range = xscale_flush_user_cache_range, + .coherent_kern_range = xscale_coherent_kern_range, + .coherent_user_range = xscale_coherent_user_range, + .flush_kern_dcache_area = xscale_flush_kern_dcache_area, + .dma_map_area = xscale_80200_A0_A1_dma_map_area, + .dma_unmap_area = xscale_dma_unmap_area, + .dma_flush_range = xscale_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_XSC3 +void xsc3_flush_icache_all(void); +void xsc3_flush_kern_cache_all(void); +void xsc3_flush_user_cache_all(void); +void xsc3_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void xsc3_coherent_kern_range(unsigned long, unsigned long); +int xsc3_coherent_user_range(unsigned long, unsigned long); +void xsc3_flush_kern_dcache_area(void *, size_t); +void xsc3_dma_map_area(const void *, size_t, int); +void xsc3_dma_unmap_area(const void *, size_t, int); +void xsc3_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns xsc3_cache_fns __initconst = { + .flush_icache_all = xsc3_flush_icache_all, + .flush_kern_all = xsc3_flush_kern_cache_all, + .flush_kern_louis = xsc3_flush_kern_cache_all, + .flush_user_all = xsc3_flush_user_cache_all, + .flush_user_range = xsc3_flush_user_cache_range, + .coherent_kern_range = xsc3_coherent_kern_range, + .coherent_user_range = xsc3_coherent_user_range, + .flush_kern_dcache_area = xsc3_flush_kern_dcache_area, + .dma_map_area = xsc3_dma_map_area, + .dma_unmap_area = xsc3_dma_unmap_area, + .dma_flush_range = xsc3_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_MOHAWK +void mohawk_flush_icache_all(void); +void mohawk_flush_kern_cache_all(void); +void mohawk_flush_user_cache_all(void); +void mohawk_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void mohawk_coherent_kern_range(unsigned long, unsigned long); +int mohawk_coherent_user_range(unsigned long, unsigned long); +void mohawk_flush_kern_dcache_area(void *, size_t); +void mohawk_dma_map_area(const void *, size_t, int); +void mohawk_dma_unmap_area(const void *, size_t, int); +void mohawk_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns mohawk_cache_fns __initconst = { + .flush_icache_all = mohawk_flush_icache_all, + .flush_kern_all = mohawk_flush_kern_cache_all, + .flush_kern_louis = mohawk_flush_kern_cache_all, + .flush_user_all = mohawk_flush_user_cache_all, + .flush_user_range = mohawk_flush_user_cache_range, + .coherent_kern_range = mohawk_coherent_kern_range, + .coherent_user_range = mohawk_coherent_user_range, + .flush_kern_dcache_area = mohawk_flush_kern_dcache_area, + .dma_map_area = mohawk_dma_map_area, + .dma_unmap_area = mohawk_dma_unmap_area, + .dma_flush_range = mohawk_dma_flush_range, +}; +#endif + +#ifdef CONFIG_CPU_FEROCEON +void feroceon_flush_icache_all(void); +void feroceon_flush_kern_cache_all(void); +void feroceon_flush_user_cache_all(void); +void feroceon_flush_user_cache_range(unsigned long, unsigned long, unsigned int); +void feroceon_coherent_kern_range(unsigned long, unsigned long); +int feroceon_coherent_user_range(unsigned long, unsigned long); +void feroceon_flush_kern_dcache_area(void *, size_t); +void feroceon_dma_map_area(const void *, size_t, int); +void feroceon_dma_unmap_area(const void *, size_t, int); +void feroceon_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns feroceon_cache_fns __initconst = { + .flush_icache_all = feroceon_flush_icache_all, + .flush_kern_all = feroceon_flush_kern_cache_all, + .flush_kern_louis = feroceon_flush_kern_cache_all, + .flush_user_all = feroceon_flush_user_cache_all, + .flush_user_range = feroceon_flush_user_cache_range, + .coherent_kern_range = feroceon_coherent_kern_range, + .coherent_user_range = feroceon_coherent_user_range, + .flush_kern_dcache_area = feroceon_flush_kern_dcache_area, + .dma_map_area = feroceon_dma_map_area, + .dma_unmap_area = feroceon_dma_unmap_area, + .dma_flush_range = feroceon_dma_flush_range, +}; + +void feroceon_range_flush_kern_dcache_area(void *, size_t); +void feroceon_range_dma_map_area(const void *, size_t, int); +void feroceon_range_dma_flush_range(const void *, const void *); + +struct cpu_cache_fns feroceon_range_cache_fns __initconst = { + .flush_icache_all = feroceon_flush_icache_all, + .flush_kern_all = feroceon_flush_kern_cache_all, + .flush_kern_louis = feroceon_flush_kern_cache_all, + .flush_user_all = feroceon_flush_user_cache_all, + .flush_user_range = feroceon_flush_user_cache_range, + .coherent_kern_range = feroceon_coherent_kern_range, + .coherent_user_range = feroceon_coherent_user_range, + .flush_kern_dcache_area = feroceon_range_flush_kern_dcache_area, + .dma_map_area = feroceon_range_dma_map_area, + .dma_unmap_area = feroceon_dma_unmap_area, + .dma_flush_range = feroceon_range_dma_flush_range, +}; +#endif diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 439dc6a26bb9..dfa9554ef331 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -242,6 +242,27 @@ static inline bool is_permission_fault(unsigned int fsr) return false; } +#ifdef CONFIG_CPU_TTBR0_PAN +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) +{ + struct svc_pt_regs *svcregs; + + /* If we are in user mode: permission granted */ + if (user_mode(regs)) + return true; + + /* uaccess state saved above pt_regs on SVC exception entry */ + svcregs = to_svc_pt_regs(regs); + + return !(svcregs->ttbcr & TTBCR_EPD0); +} +#else +static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) +{ + return true; +} +#endif + static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -285,6 +306,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* + * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are + * routed via the translation fault mechanism. Check whether uaccess + * is disabled while in kernel mode. + */ + if (!ttbr0_usermode_access_allowed(regs)) + goto no_context; + if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index e8c6f4be0ce1..5345d218899a 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -22,6 +22,7 @@ #include <linux/sizes.h> #include <linux/stop_machine.h> #include <linux/swiotlb.h> +#include <linux/execmem.h> #include <asm/cp15.h> #include <asm/mach-types.h> @@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long end) free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif + +#ifdef CONFIG_EXECMEM + +#ifdef CONFIG_XIP_KERNEL +/* + * The XIP kernel text is mapped in the module area for modules and + * some other stuff to work without any indirect relocations. + * MODULES_VADDR is redefined here and not in asm/memory.h to avoid + * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. + */ +#undef MODULES_VADDR +#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK) +#endif + +#ifdef CONFIG_MMU +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long fallback_start = 0, fallback_end = 0; + + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) { + fallback_start = VMALLOC_START; + fallback_end = VMALLOC_END; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_EXEC, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_MMU */ + +#endif /* CONFIG_EXECMEM */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index c24e29c0b9a4..3f774856ca67 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1687,9 +1687,8 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ cr = get_cr(); set_cr(cr & ~(CR_I | CR_C)); - asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); - asm volatile("mcr p15, 0, %0, c2, c0, 2" - : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); + ttbcr = cpu_get_ttbcr(); + cpu_set_ttbcr(ttbcr & ~(3 << 8 | 3 << 10)); flush_cache_all(); /* @@ -1701,7 +1700,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) lpae_pgtables_remap(offset, pa_pgd); /* Re-enable the caches and cacheable TLB walks */ - asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + cpu_set_ttbcr(ttbcr); set_cr(cr); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 6837cf7a4812..d0ce3414a13e 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -56,18 +57,20 @@ /* * cpu_arm1020_proc_init() */ -ENTRY(cpu_arm1020_proc_init) +SYM_TYPED_FUNC_START(cpu_arm1020_proc_init) ret lr +SYM_FUNC_END(cpu_arm1020_proc_init) /* * cpu_arm1020_proc_fin() */ -ENTRY(cpu_arm1020_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm1020_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm1020_proc_fin) /* * cpu_arm1020_reset(loc) @@ -80,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1020_reset) +SYM_TYPED_FUNC_START(cpu_arm1020_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -92,16 +95,17 @@ ENTRY(cpu_arm1020_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm1020_reset) +SYM_FUNC_END(cpu_arm1020_reset) .popsection /* * cpu_arm1020_do_idle() */ .align 5 -ENTRY(cpu_arm1020_do_idle) +SYM_TYPED_FUNC_START(cpu_arm1020_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm1020_do_idle) /* ================================= CACHE ================================ */ @@ -112,13 +116,13 @@ ENTRY(cpu_arm1020_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm1020_flush_icache_all) +SYM_TYPED_FUNC_START(arm1020_flush_icache_all) #ifndef CONFIG_CPU_ICACHE_DISABLE mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif ret lr -ENDPROC(arm1020_flush_icache_all) +SYM_FUNC_END(arm1020_flush_icache_all) /* * flush_user_cache_all() @@ -126,14 +130,14 @@ ENDPROC(arm1020_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(arm1020_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm1020_flush_user_cache_all, arm1020_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm1020_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm1020_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -154,6 +158,7 @@ __flush_whole_cache: #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -165,7 +170,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags for this space */ -ENTRY(arm1020_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm1020_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -185,6 +190,7 @@ ENTRY(arm1020_flush_user_cache_range) #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -196,8 +202,11 @@ ENTRY(arm1020_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020_coherent_kern_range) - /* FALLTRHOUGH */ +SYM_TYPED_FUNC_START(arm1020_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm1020_coherent_user_range +#endif +SYM_FUNC_END(arm1020_coherent_kern_range) /* * coherent_user_range(start, end) @@ -209,7 +218,7 @@ ENTRY(arm1020_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020_coherent_user_range) +SYM_TYPED_FUNC_START(arm1020_coherent_user_range) mov ip, #0 bic r0, r0, #CACHE_DLINESIZE - 1 mcr p15, 0, ip, c7, c10, 4 @@ -227,6 +236,7 @@ ENTRY(arm1020_coherent_user_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm1020_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -237,7 +247,7 @@ ENTRY(arm1020_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm1020_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm1020_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE add r1, r0, r1 @@ -249,6 +259,7 @@ ENTRY(arm1020_flush_kern_dcache_area) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -314,7 +325,7 @@ arm1020_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020_dma_flush_range) +SYM_TYPED_FUNC_START(arm1020_dma_flush_range) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -327,6 +338,7 @@ ENTRY(arm1020_dma_flush_range) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -334,13 +346,13 @@ ENTRY(arm1020_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1020_dma_map_area) +SYM_TYPED_FUNC_START(arm1020_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm1020_dma_clean_range bcs arm1020_dma_inv_range b arm1020_dma_flush_range -ENDPROC(arm1020_dma_map_area) +SYM_FUNC_END(arm1020_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -348,18 +360,12 @@ ENDPROC(arm1020_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1020_dma_unmap_area) +SYM_TYPED_FUNC_START(arm1020_dma_unmap_area) ret lr -ENDPROC(arm1020_dma_unmap_area) - - .globl arm1020_flush_kern_cache_louis - .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm1020 +SYM_FUNC_END(arm1020_dma_unmap_area) .align 5 -ENTRY(cpu_arm1020_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm1020_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_DISABLE mov ip, #0 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -369,6 +375,7 @@ ENTRY(cpu_arm1020_dcache_clean_area) bhi 1b #endif ret lr +SYM_FUNC_END(cpu_arm1020_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -380,7 +387,7 @@ ENTRY(cpu_arm1020_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm1020_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm1020_switch_mm) #ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r3, c7, c10, 4 @@ -408,14 +415,15 @@ ENTRY(cpu_arm1020_switch_mm) mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif /* CONFIG_MMU */ ret lr - +SYM_FUNC_END(cpu_arm1020_switch_mm) + /* * cpu_arm1020_set_pte(ptep, pte) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1020_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm1020_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -426,6 +434,7 @@ ENTRY(cpu_arm1020_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm1020_set_pte_ext) .type __arm1020_setup, #function __arm1020_setup: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index df49b10250b8..64f031bf6eff 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -56,18 +57,20 @@ /* * cpu_arm1020e_proc_init() */ -ENTRY(cpu_arm1020e_proc_init) +SYM_TYPED_FUNC_START(cpu_arm1020e_proc_init) ret lr +SYM_FUNC_END(cpu_arm1020e_proc_init) /* * cpu_arm1020e_proc_fin() */ -ENTRY(cpu_arm1020e_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm1020e_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm1020e_proc_fin) /* * cpu_arm1020e_reset(loc) @@ -80,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1020e_reset) +SYM_TYPED_FUNC_START(cpu_arm1020e_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -92,16 +95,17 @@ ENTRY(cpu_arm1020e_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm1020e_reset) +SYM_FUNC_END(cpu_arm1020e_reset) .popsection /* * cpu_arm1020e_do_idle() */ .align 5 -ENTRY(cpu_arm1020e_do_idle) +SYM_TYPED_FUNC_START(cpu_arm1020e_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm1020e_do_idle) /* ================================= CACHE ================================ */ @@ -112,13 +116,13 @@ ENTRY(cpu_arm1020e_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm1020e_flush_icache_all) +SYM_TYPED_FUNC_START(arm1020e_flush_icache_all) #ifndef CONFIG_CPU_ICACHE_DISABLE mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif ret lr -ENDPROC(arm1020e_flush_icache_all) +SYM_FUNC_END(arm1020e_flush_icache_all) /* * flush_user_cache_all() @@ -126,14 +130,14 @@ ENDPROC(arm1020e_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(arm1020e_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm1020e_flush_user_cache_all, arm1020e_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm1020e_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm1020e_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -153,6 +157,7 @@ __flush_whole_cache: #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020e_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -164,7 +169,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags for this space */ -ENTRY(arm1020e_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm1020e_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -182,6 +187,7 @@ ENTRY(arm1020e_flush_user_cache_range) #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020e_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -193,8 +199,12 @@ ENTRY(arm1020e_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020e_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm1020e_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm1020e_coherent_user_range +#endif +SYM_FUNC_END(arm1020e_coherent_kern_range) + /* * coherent_user_range(start, end) * @@ -205,7 +215,7 @@ ENTRY(arm1020e_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020e_coherent_user_range) +SYM_TYPED_FUNC_START(arm1020e_coherent_user_range) mov ip, #0 bic r0, r0, #CACHE_DLINESIZE - 1 1: @@ -221,6 +231,7 @@ ENTRY(arm1020e_coherent_user_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm1020e_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -231,7 +242,7 @@ ENTRY(arm1020e_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm1020e_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm1020e_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE add r1, r0, r1 @@ -242,6 +253,7 @@ ENTRY(arm1020e_flush_kern_dcache_area) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020e_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -302,7 +314,7 @@ arm1020e_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1020e_dma_flush_range) +SYM_TYPED_FUNC_START(arm1020e_dma_flush_range) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -313,6 +325,7 @@ ENTRY(arm1020e_dma_flush_range) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1020e_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -320,13 +333,13 @@ ENTRY(arm1020e_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1020e_dma_map_area) +SYM_TYPED_FUNC_START(arm1020e_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm1020e_dma_clean_range bcs arm1020e_dma_inv_range b arm1020e_dma_flush_range -ENDPROC(arm1020e_dma_map_area) +SYM_FUNC_END(arm1020e_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -334,18 +347,12 @@ ENDPROC(arm1020e_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1020e_dma_unmap_area) +SYM_TYPED_FUNC_START(arm1020e_dma_unmap_area) ret lr -ENDPROC(arm1020e_dma_unmap_area) - - .globl arm1020e_flush_kern_cache_louis - .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm1020e +SYM_FUNC_END(arm1020e_dma_unmap_area) .align 5 -ENTRY(cpu_arm1020e_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm1020e_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_DISABLE mov ip, #0 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -354,6 +361,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area) bhi 1b #endif ret lr +SYM_FUNC_END(cpu_arm1020e_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -365,7 +373,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm1020e_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm1020e_switch_mm) #ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r3, c7, c10, 4 @@ -392,14 +400,15 @@ ENTRY(cpu_arm1020e_switch_mm) mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr - +SYM_FUNC_END(cpu_arm1020e_switch_mm) + /* * cpu_arm1020e_set_pte(ptep, pte) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1020e_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm1020e_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -408,6 +417,7 @@ ENTRY(cpu_arm1020e_set_pte_ext) #endif #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm1020e_set_pte_ext) .type __arm1020e_setup, #function __arm1020e_setup: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index e89ce467f672..42ed5ed07252 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -56,18 +57,20 @@ /* * cpu_arm1022_proc_init() */ -ENTRY(cpu_arm1022_proc_init) +SYM_TYPED_FUNC_START(cpu_arm1022_proc_init) ret lr +SYM_FUNC_END(cpu_arm1022_proc_init) /* * cpu_arm1022_proc_fin() */ -ENTRY(cpu_arm1022_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm1022_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm1022_proc_fin) /* * cpu_arm1022_reset(loc) @@ -80,7 +83,7 @@ ENTRY(cpu_arm1022_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1022_reset) +SYM_TYPED_FUNC_START(cpu_arm1022_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -92,16 +95,17 @@ ENTRY(cpu_arm1022_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm1022_reset) +SYM_FUNC_END(cpu_arm1022_reset) .popsection /* * cpu_arm1022_do_idle() */ .align 5 -ENTRY(cpu_arm1022_do_idle) +SYM_TYPED_FUNC_START(cpu_arm1022_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm1022_do_idle) /* ================================= CACHE ================================ */ @@ -112,13 +116,13 @@ ENTRY(cpu_arm1022_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm1022_flush_icache_all) +SYM_TYPED_FUNC_START(arm1022_flush_icache_all) #ifndef CONFIG_CPU_ICACHE_DISABLE mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif ret lr -ENDPROC(arm1022_flush_icache_all) +SYM_FUNC_END(arm1022_flush_icache_all) /* * flush_user_cache_all() @@ -126,14 +130,14 @@ ENDPROC(arm1022_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(arm1022_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm1022_flush_user_cache_all, arm1022_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm1022_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm1022_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -152,6 +156,7 @@ __flush_whole_cache: #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1022_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -163,7 +168,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags for this space */ -ENTRY(arm1022_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm1022_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -181,6 +186,7 @@ ENTRY(arm1022_flush_user_cache_range) #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1022_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -192,8 +198,11 @@ ENTRY(arm1022_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1022_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm1022_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm1022_coherent_user_range +#endif +SYM_FUNC_END(arm1022_coherent_kern_range) /* * coherent_user_range(start, end) @@ -205,7 +214,7 @@ ENTRY(arm1022_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1022_coherent_user_range) +SYM_TYPED_FUNC_START(arm1022_coherent_user_range) mov ip, #0 bic r0, r0, #CACHE_DLINESIZE - 1 1: @@ -221,6 +230,7 @@ ENTRY(arm1022_coherent_user_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm1022_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -231,7 +241,7 @@ ENTRY(arm1022_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm1022_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm1022_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE add r1, r0, r1 @@ -242,6 +252,7 @@ ENTRY(arm1022_flush_kern_dcache_area) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1022_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -302,7 +313,7 @@ arm1022_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1022_dma_flush_range) +SYM_TYPED_FUNC_START(arm1022_dma_flush_range) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -313,6 +324,7 @@ ENTRY(arm1022_dma_flush_range) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1022_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -320,13 +332,13 @@ ENTRY(arm1022_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1022_dma_map_area) +SYM_TYPED_FUNC_START(arm1022_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm1022_dma_clean_range bcs arm1022_dma_inv_range b arm1022_dma_flush_range -ENDPROC(arm1022_dma_map_area) +SYM_FUNC_END(arm1022_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -334,18 +346,12 @@ ENDPROC(arm1022_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1022_dma_unmap_area) +SYM_TYPED_FUNC_START(arm1022_dma_unmap_area) ret lr -ENDPROC(arm1022_dma_unmap_area) - - .globl arm1022_flush_kern_cache_louis - .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm1022 +SYM_FUNC_END(arm1022_dma_unmap_area) .align 5 -ENTRY(cpu_arm1022_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm1022_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_DISABLE mov ip, #0 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -354,6 +360,7 @@ ENTRY(cpu_arm1022_dcache_clean_area) bhi 1b #endif ret lr +SYM_FUNC_END(cpu_arm1022_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -365,7 +372,7 @@ ENTRY(cpu_arm1022_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm1022_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm1022_switch_mm) #ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments @@ -385,14 +392,15 @@ ENTRY(cpu_arm1022_switch_mm) mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr - +SYM_FUNC_END(cpu_arm1022_switch_mm) + /* * cpu_arm1022_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1022_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm1022_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -401,6 +409,7 @@ ENTRY(cpu_arm1022_set_pte_ext) #endif #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm1022_set_pte_ext) .type __arm1022_setup, #function __arm1022_setup: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 7fdd1a205e8e..b3ae62cd553a 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -56,18 +57,20 @@ /* * cpu_arm1026_proc_init() */ -ENTRY(cpu_arm1026_proc_init) +SYM_TYPED_FUNC_START(cpu_arm1026_proc_init) ret lr +SYM_FUNC_END(cpu_arm1026_proc_init) /* * cpu_arm1026_proc_fin() */ -ENTRY(cpu_arm1026_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm1026_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm1026_proc_fin) /* * cpu_arm1026_reset(loc) @@ -80,7 +83,7 @@ ENTRY(cpu_arm1026_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm1026_reset) +SYM_TYPED_FUNC_START(cpu_arm1026_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -92,16 +95,17 @@ ENTRY(cpu_arm1026_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm1026_reset) +SYM_FUNC_END(cpu_arm1026_reset) .popsection /* * cpu_arm1026_do_idle() */ .align 5 -ENTRY(cpu_arm1026_do_idle) +SYM_TYPED_FUNC_START(cpu_arm1026_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm1026_do_idle) /* ================================= CACHE ================================ */ @@ -112,13 +116,13 @@ ENTRY(cpu_arm1026_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm1026_flush_icache_all) +SYM_TYPED_FUNC_START(arm1026_flush_icache_all) #ifndef CONFIG_CPU_ICACHE_DISABLE mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif ret lr -ENDPROC(arm1026_flush_icache_all) +SYM_FUNC_END(arm1026_flush_icache_all) /* * flush_user_cache_all() @@ -126,14 +130,14 @@ ENDPROC(arm1026_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(arm1026_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm1026_flush_user_cache_all, arm1026_flush_kern_cache_all) + /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm1026_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm1026_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -147,6 +151,7 @@ __flush_whole_cache: #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1026_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -158,7 +163,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags for this space */ -ENTRY(arm1026_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm1026_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -176,6 +181,7 @@ ENTRY(arm1026_flush_user_cache_range) #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1026_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -187,8 +193,12 @@ ENTRY(arm1026_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1026_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm1026_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm1026_coherent_user_range +#endif +SYM_FUNC_END(arm1026_coherent_kern_range) + /* * coherent_user_range(start, end) * @@ -199,7 +209,7 @@ ENTRY(arm1026_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1026_coherent_user_range) +SYM_TYPED_FUNC_START(arm1026_coherent_user_range) mov ip, #0 bic r0, r0, #CACHE_DLINESIZE - 1 1: @@ -215,6 +225,7 @@ ENTRY(arm1026_coherent_user_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm1026_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -225,7 +236,7 @@ ENTRY(arm1026_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm1026_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm1026_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE add r1, r0, r1 @@ -236,6 +247,7 @@ ENTRY(arm1026_flush_kern_dcache_area) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1026_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -296,7 +308,7 @@ arm1026_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm1026_dma_flush_range) +SYM_TYPED_FUNC_START(arm1026_dma_flush_range) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -307,6 +319,7 @@ ENTRY(arm1026_dma_flush_range) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm1026_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -314,13 +327,13 @@ ENTRY(arm1026_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1026_dma_map_area) +SYM_TYPED_FUNC_START(arm1026_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm1026_dma_clean_range bcs arm1026_dma_inv_range b arm1026_dma_flush_range -ENDPROC(arm1026_dma_map_area) +SYM_FUNC_END(arm1026_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -328,18 +341,12 @@ ENDPROC(arm1026_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm1026_dma_unmap_area) +SYM_TYPED_FUNC_START(arm1026_dma_unmap_area) ret lr -ENDPROC(arm1026_dma_unmap_area) - - .globl arm1026_flush_kern_cache_louis - .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm1026 +SYM_FUNC_END(arm1026_dma_unmap_area) .align 5 -ENTRY(cpu_arm1026_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm1026_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_DISABLE mov ip, #0 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -348,6 +355,7 @@ ENTRY(cpu_arm1026_dcache_clean_area) bhi 1b #endif ret lr +SYM_FUNC_END(cpu_arm1026_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -359,7 +367,7 @@ ENTRY(cpu_arm1026_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm1026_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm1026_switch_mm) #ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE @@ -374,14 +382,15 @@ ENTRY(cpu_arm1026_switch_mm) mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr - +SYM_FUNC_END(cpu_arm1026_switch_mm) + /* * cpu_arm1026_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1026_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm1026_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -390,6 +399,7 @@ ENTRY(cpu_arm1026_set_pte_ext) #endif #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm1026_set_pte_ext) .type __arm1026_setup, #function __arm1026_setup: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index 3b687e6dd9fd..59732c334e1d 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -20,6 +20,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -35,24 +36,30 @@ * * Notes : This processor does not require these */ -ENTRY(cpu_arm720_dcache_clean_area) -ENTRY(cpu_arm720_proc_init) +SYM_TYPED_FUNC_START(cpu_arm720_dcache_clean_area) ret lr +SYM_FUNC_END(cpu_arm720_dcache_clean_area) -ENTRY(cpu_arm720_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm720_proc_init) + ret lr +SYM_FUNC_END(cpu_arm720_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm720_proc_fin) mrc p15, 0, r0, c1, c0, 0 bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm720_proc_fin) /* * Function: arm720_proc_do_idle(void) * Params : r0 = unused * Purpose : put the processor in proper idle mode */ -ENTRY(cpu_arm720_do_idle) +SYM_TYPED_FUNC_START(cpu_arm720_do_idle) ret lr +SYM_FUNC_END(cpu_arm720_do_idle) /* * Function: arm720_switch_mm(unsigned long pgd_phys) @@ -60,7 +67,7 @@ ENTRY(cpu_arm720_do_idle) * Purpose : Perform a task switch, saving the old process' state and restoring * the new. */ -ENTRY(cpu_arm720_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm720_switch_mm) #ifdef CONFIG_MMU mov r1, #0 mcr p15, 0, r1, c7, c7, 0 @ invalidate cache @@ -68,6 +75,7 @@ ENTRY(cpu_arm720_switch_mm) mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) #endif ret lr +SYM_FUNC_END(cpu_arm720_switch_mm) /* * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) @@ -76,11 +84,12 @@ ENTRY(cpu_arm720_switch_mm) * Purpose : Set a PTE and flush it out of any WB cache */ .align 5 -ENTRY(cpu_arm720_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm720_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 #endif ret lr +SYM_FUNC_END(cpu_arm720_set_pte_ext) /* * Function: arm720_reset @@ -88,7 +97,7 @@ ENTRY(cpu_arm720_set_pte_ext) * Notes : This sets up everything for a reset */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm720_reset) +SYM_TYPED_FUNC_START(cpu_arm720_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate cache #ifdef CONFIG_MMU @@ -99,7 +108,7 @@ ENTRY(cpu_arm720_reset) bic ip, ip, #0x2100 @ ..v....s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm720_reset) +SYM_FUNC_END(cpu_arm720_reset) .popsection .type __arm710_setup, #function diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index f2ec3bc60874..78854df63964 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -24,21 +25,32 @@ * * These are not required. */ -ENTRY(cpu_arm740_proc_init) -ENTRY(cpu_arm740_do_idle) -ENTRY(cpu_arm740_dcache_clean_area) -ENTRY(cpu_arm740_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm740_proc_init) ret lr +SYM_FUNC_END(cpu_arm740_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm740_do_idle) + ret lr +SYM_FUNC_END(cpu_arm740_do_idle) + +SYM_TYPED_FUNC_START(cpu_arm740_dcache_clean_area) + ret lr +SYM_FUNC_END(cpu_arm740_dcache_clean_area) + +SYM_TYPED_FUNC_START(cpu_arm740_switch_mm) + ret lr +SYM_FUNC_END(cpu_arm740_switch_mm) /* * cpu_arm740_proc_fin() */ -ENTRY(cpu_arm740_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm740_proc_fin) mrc p15, 0, r0, c1, c0, 0 bic r0, r0, #0x3f000000 @ bank/f/lock/s bic r0, r0, #0x0000000c @ w-buffer/cache mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm740_proc_fin) /* * cpu_arm740_reset(loc) @@ -46,14 +58,14 @@ ENTRY(cpu_arm740_proc_fin) * Notes : This sets up everything for a reset */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm740_reset) +SYM_TYPED_FUNC_START(cpu_arm740_reset) mov ip, #0 mcr p15, 0, ip, c7, c0, 0 @ invalidate cache mrc p15, 0, ip, c1, c0, 0 @ get ctrl register bic ip, ip, #0x0000000c @ ............wc.. mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm740_reset) +SYM_FUNC_END(cpu_arm740_reset) .popsection .type __arm740_setup, #function diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 01bbe7576c1c..baa3d4472147 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -23,18 +24,29 @@ * cpu_arm7tdmi_switch_mm() * * These are not required. - */ -ENTRY(cpu_arm7tdmi_proc_init) -ENTRY(cpu_arm7tdmi_do_idle) -ENTRY(cpu_arm7tdmi_dcache_clean_area) -ENTRY(cpu_arm7tdmi_switch_mm) - ret lr +*/ +SYM_TYPED_FUNC_START(cpu_arm7tdmi_proc_init) + ret lr +SYM_FUNC_END(cpu_arm7tdmi_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm7tdmi_do_idle) + ret lr +SYM_FUNC_END(cpu_arm7tdmi_do_idle) + +SYM_TYPED_FUNC_START(cpu_arm7tdmi_dcache_clean_area) + ret lr +SYM_FUNC_END(cpu_arm7tdmi_dcache_clean_area) + +SYM_TYPED_FUNC_START(cpu_arm7tdmi_switch_mm) + ret lr +SYM_FUNC_END(cpu_arm7tdmi_switch_mm) /* * cpu_arm7tdmi_proc_fin() - */ -ENTRY(cpu_arm7tdmi_proc_fin) - ret lr +*/ +SYM_TYPED_FUNC_START(cpu_arm7tdmi_proc_fin) + ret lr +SYM_FUNC_END(cpu_arm7tdmi_proc_fin) /* * Function: cpu_arm7tdmi_reset(loc) @@ -42,9 +54,9 @@ ENTRY(cpu_arm7tdmi_proc_fin) * Purpose : Sets up everything for a reset and jump to the location for soft reset. */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm7tdmi_reset) +SYM_TYPED_FUNC_START(cpu_arm7tdmi_reset) ret r0 -ENDPROC(cpu_arm7tdmi_reset) +SYM_FUNC_END(cpu_arm7tdmi_reset) .popsection .type __arm7tdmi_setup, #function diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index a234cd8ba5e6..a30df54ad5fa 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -48,18 +49,20 @@ /* * cpu_arm920_proc_init() */ -ENTRY(cpu_arm920_proc_init) +SYM_TYPED_FUNC_START(cpu_arm920_proc_init) ret lr +SYM_FUNC_END(cpu_arm920_proc_init) /* * cpu_arm920_proc_fin() */ -ENTRY(cpu_arm920_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm920_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm920_proc_fin) /* * cpu_arm920_reset(loc) @@ -72,7 +75,7 @@ ENTRY(cpu_arm920_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm920_reset) +SYM_TYPED_FUNC_START(cpu_arm920_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -84,17 +87,17 @@ ENTRY(cpu_arm920_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm920_reset) +SYM_FUNC_END(cpu_arm920_reset) .popsection /* * cpu_arm920_do_idle() */ .align 5 -ENTRY(cpu_arm920_do_idle) +SYM_TYPED_FUNC_START(cpu_arm920_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr - +SYM_FUNC_END(cpu_arm920_do_idle) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -103,11 +106,11 @@ ENTRY(cpu_arm920_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm920_flush_icache_all) +SYM_TYPED_FUNC_START(arm920_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm920_flush_icache_all) +SYM_FUNC_END(arm920_flush_icache_all) /* * flush_user_cache_all() @@ -115,15 +118,14 @@ ENDPROC(arm920_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(arm920_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm920_flush_user_cache_all, arm920_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm920_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm920_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -138,6 +140,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm920_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -149,7 +152,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags for address space */ -ENTRY(arm920_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm920_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -164,6 +167,7 @@ ENTRY(arm920_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm920_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -175,8 +179,11 @@ ENTRY(arm920_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm920_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm920_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm920_coherent_user_range +#endif +SYM_FUNC_END(arm920_coherent_kern_range) /* * coherent_user_range(start, end) @@ -188,7 +195,7 @@ ENTRY(arm920_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm920_coherent_user_range) +SYM_TYPED_FUNC_START(arm920_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -198,6 +205,7 @@ ENTRY(arm920_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm920_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -208,7 +216,7 @@ ENTRY(arm920_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm920_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm920_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -218,6 +226,7 @@ ENTRY(arm920_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm920_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -272,7 +281,7 @@ arm920_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm920_dma_flush_range) +SYM_TYPED_FUNC_START(arm920_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -280,6 +289,7 @@ ENTRY(arm920_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm920_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -287,13 +297,13 @@ ENTRY(arm920_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm920_dma_map_area) +SYM_TYPED_FUNC_START(arm920_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm920_dma_clean_range bcs arm920_dma_inv_range b arm920_dma_flush_range -ENDPROC(arm920_dma_map_area) +SYM_FUNC_END(arm920_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -301,24 +311,20 @@ ENDPROC(arm920_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm920_dma_unmap_area) +SYM_TYPED_FUNC_START(arm920_dma_unmap_area) ret lr -ENDPROC(arm920_dma_unmap_area) +SYM_FUNC_END(arm920_dma_unmap_area) - .globl arm920_flush_kern_cache_louis - .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm920 -#endif +#endif /* !CONFIG_CPU_DCACHE_WRITETHROUGH */ -ENTRY(cpu_arm920_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm920_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b ret lr +SYM_FUNC_END(cpu_arm920_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -330,7 +336,7 @@ ENTRY(cpu_arm920_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm920_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm920_switch_mm) #ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -354,6 +360,7 @@ ENTRY(cpu_arm920_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr +SYM_FUNC_END(cpu_arm920_switch_mm) /* * cpu_arm920_set_pte(ptep, pte, ext) @@ -361,7 +368,7 @@ ENTRY(cpu_arm920_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm920_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm920_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -369,21 +376,22 @@ ENTRY(cpu_arm920_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_arm920_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size .equ cpu_arm920_suspend_size, 4 * 3 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_arm920_do_suspend) +SYM_TYPED_FUNC_START(cpu_arm920_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c1, c0, 0 @ Control register stmia r0, {r4 - r6} ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_arm920_do_suspend) +SYM_FUNC_END(cpu_arm920_do_suspend) -ENTRY(cpu_arm920_do_resume) +SYM_TYPED_FUNC_START(cpu_arm920_do_resume) mov ip, #0 mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches @@ -393,7 +401,7 @@ ENTRY(cpu_arm920_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB address mov r0, r6 @ control register b cpu_resume_mmu -ENDPROC(cpu_arm920_do_resume) +SYM_FUNC_END(cpu_arm920_do_resume) #endif .type __arm920_setup, #function diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 53c029dcfd83..aac4e048100d 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -50,18 +51,20 @@ /* * cpu_arm922_proc_init() */ -ENTRY(cpu_arm922_proc_init) +SYM_TYPED_FUNC_START(cpu_arm922_proc_init) ret lr +SYM_FUNC_END(cpu_arm922_proc_init) /* * cpu_arm922_proc_fin() */ -ENTRY(cpu_arm922_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm922_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm922_proc_fin) /* * cpu_arm922_reset(loc) @@ -74,7 +77,7 @@ ENTRY(cpu_arm922_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm922_reset) +SYM_TYPED_FUNC_START(cpu_arm922_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -86,17 +89,17 @@ ENTRY(cpu_arm922_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm922_reset) +SYM_FUNC_END(cpu_arm922_reset) .popsection /* * cpu_arm922_do_idle() */ .align 5 -ENTRY(cpu_arm922_do_idle) +SYM_TYPED_FUNC_START(cpu_arm922_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr - +SYM_FUNC_END(cpu_arm922_do_idle) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -105,11 +108,11 @@ ENTRY(cpu_arm922_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm922_flush_icache_all) +SYM_TYPED_FUNC_START(arm922_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm922_flush_icache_all) +SYM_FUNC_END(arm922_flush_icache_all) /* * flush_user_cache_all() @@ -117,15 +120,14 @@ ENDPROC(arm922_flush_icache_all) * Clean and invalidate all cache entries in a particular * address space. */ -ENTRY(arm922_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm922_flush_user_cache_all, arm922_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm922_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm922_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -140,6 +142,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm922_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -151,7 +154,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags describing address space */ -ENTRY(arm922_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm922_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -166,6 +169,7 @@ ENTRY(arm922_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm922_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -177,8 +181,11 @@ ENTRY(arm922_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm922_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm922_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm922_coherent_user_range +#endif +SYM_FUNC_END(arm922_coherent_kern_range) /* * coherent_user_range(start, end) @@ -190,7 +197,7 @@ ENTRY(arm922_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm922_coherent_user_range) +SYM_TYPED_FUNC_START(arm922_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -200,6 +207,7 @@ ENTRY(arm922_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm922_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -210,7 +218,7 @@ ENTRY(arm922_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm922_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm922_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -220,6 +228,7 @@ ENTRY(arm922_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm922_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -274,7 +283,7 @@ arm922_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm922_dma_flush_range) +SYM_TYPED_FUNC_START(arm922_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -282,6 +291,7 @@ ENTRY(arm922_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm922_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -289,13 +299,13 @@ ENTRY(arm922_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm922_dma_map_area) +SYM_TYPED_FUNC_START(arm922_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm922_dma_clean_range bcs arm922_dma_inv_range b arm922_dma_flush_range -ENDPROC(arm922_dma_map_area) +SYM_FUNC_END(arm922_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -303,19 +313,13 @@ ENDPROC(arm922_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm922_dma_unmap_area) +SYM_TYPED_FUNC_START(arm922_dma_unmap_area) ret lr -ENDPROC(arm922_dma_unmap_area) - - .globl arm922_flush_kern_cache_louis - .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm922 -#endif +SYM_FUNC_END(arm922_dma_unmap_area) +#endif /* !CONFIG_CPU_DCACHE_WRITETHROUGH */ -ENTRY(cpu_arm922_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm922_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -323,6 +327,7 @@ ENTRY(cpu_arm922_dcache_clean_area) bhi 1b #endif ret lr +SYM_FUNC_END(cpu_arm922_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -334,7 +339,7 @@ ENTRY(cpu_arm922_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm922_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm922_switch_mm) #ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -358,6 +363,7 @@ ENTRY(cpu_arm922_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr +SYM_FUNC_END(cpu_arm922_switch_mm) /* * cpu_arm922_set_pte_ext(ptep, pte, ext) @@ -365,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm922_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm922_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -373,6 +379,7 @@ ENTRY(cpu_arm922_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm922_set_pte_ext) .type __arm922_setup, #function __arm922_setup: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 0bfad62ea858..035941faeb2e 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -37,6 +37,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -71,18 +72,20 @@ /* * cpu_arm925_proc_init() */ -ENTRY(cpu_arm925_proc_init) +SYM_TYPED_FUNC_START(cpu_arm925_proc_init) ret lr +SYM_FUNC_END(cpu_arm925_proc_init) /* * cpu_arm925_proc_fin() */ -ENTRY(cpu_arm925_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm925_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm925_proc_fin) /* * cpu_arm925_reset(loc) @@ -95,14 +98,14 @@ ENTRY(cpu_arm925_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm925_reset) +SYM_TYPED_FUNC_START(cpu_arm925_reset) /* Send software reset to MPU and DSP */ mov ip, #0xff000000 orr ip, ip, #0x00fe0000 orr ip, ip, #0x0000ce00 mov r4, #1 strh r4, [ip, #0x10] -ENDPROC(cpu_arm925_reset) +SYM_FUNC_END(cpu_arm925_reset) .popsection mov ip, #0 @@ -123,7 +126,7 @@ ENDPROC(cpu_arm925_reset) * Called with IRQs disabled */ .align 10 -ENTRY(cpu_arm925_do_idle) +SYM_TYPED_FUNC_START(cpu_arm925_do_idle) mov r0, #0 mrc p15, 0, r1, c1, c0, 0 @ Read control register mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer @@ -132,17 +135,18 @@ ENTRY(cpu_arm925_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable ret lr +SYM_FUNC_END(cpu_arm925_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm925_flush_icache_all) +SYM_TYPED_FUNC_START(arm925_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm925_flush_icache_all) +SYM_FUNC_END(arm925_flush_icache_all) /* * flush_user_cache_all() @@ -150,15 +154,14 @@ ENDPROC(arm925_flush_icache_all) * Clean and invalidate all cache entries in a particular * address space. */ -ENTRY(arm925_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm925_flush_user_cache_all, arm925_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm925_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm925_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -175,6 +178,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm925_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -186,7 +190,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags describing address space */ -ENTRY(arm925_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm925_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -212,6 +216,7 @@ ENTRY(arm925_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm925_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -223,8 +228,11 @@ ENTRY(arm925_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm925_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm925_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm925_coherent_user_range +#endif +SYM_FUNC_END(arm925_coherent_kern_range) /* * coherent_user_range(start, end) @@ -236,7 +244,7 @@ ENTRY(arm925_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm925_coherent_user_range) +SYM_TYPED_FUNC_START(arm925_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -246,6 +254,7 @@ ENTRY(arm925_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm925_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -256,7 +265,7 @@ ENTRY(arm925_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm925_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm925_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -266,6 +275,7 @@ ENTRY(arm925_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm925_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -324,7 +334,7 @@ arm925_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm925_dma_flush_range) +SYM_TYPED_FUNC_START(arm925_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -337,6 +347,7 @@ ENTRY(arm925_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm925_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -344,13 +355,13 @@ ENTRY(arm925_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm925_dma_map_area) +SYM_TYPED_FUNC_START(arm925_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm925_dma_clean_range bcs arm925_dma_inv_range b arm925_dma_flush_range -ENDPROC(arm925_dma_map_area) +SYM_FUNC_END(arm925_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -358,17 +369,11 @@ ENDPROC(arm925_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm925_dma_unmap_area) +SYM_TYPED_FUNC_START(arm925_dma_unmap_area) ret lr -ENDPROC(arm925_dma_unmap_area) - - .globl arm925_flush_kern_cache_louis - .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm925 +SYM_FUNC_END(arm925_dma_unmap_area) -ENTRY(cpu_arm925_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm925_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -377,6 +382,7 @@ ENTRY(cpu_arm925_dcache_clean_area) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_arm925_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -388,7 +394,7 @@ ENTRY(cpu_arm925_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm925_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm925_switch_mm) #ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -406,6 +412,7 @@ ENTRY(cpu_arm925_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr +SYM_FUNC_END(cpu_arm925_switch_mm) /* * cpu_arm925_set_pte_ext(ptep, pte, ext) @@ -413,7 +420,7 @@ ENTRY(cpu_arm925_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm925_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm925_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -423,6 +430,7 @@ ENTRY(cpu_arm925_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ ret lr +SYM_FUNC_END(cpu_arm925_set_pte_ext) .type __arm925_setup, #function __arm925_setup: diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 0487a2c3439b..6f43d6af2d9a 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -40,18 +41,20 @@ /* * cpu_arm926_proc_init() */ -ENTRY(cpu_arm926_proc_init) +SYM_TYPED_FUNC_START(cpu_arm926_proc_init) ret lr +SYM_FUNC_END(cpu_arm926_proc_init) /* * cpu_arm926_proc_fin() */ -ENTRY(cpu_arm926_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm926_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm926_proc_fin) /* * cpu_arm926_reset(loc) @@ -64,7 +67,7 @@ ENTRY(cpu_arm926_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_arm926_reset) +SYM_TYPED_FUNC_START(cpu_arm926_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -76,7 +79,7 @@ ENTRY(cpu_arm926_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm926_reset) +SYM_FUNC_END(cpu_arm926_reset) .popsection /* @@ -85,7 +88,7 @@ ENDPROC(cpu_arm926_reset) * Called with IRQs disabled */ .align 10 -ENTRY(cpu_arm926_do_idle) +SYM_TYPED_FUNC_START(cpu_arm926_do_idle) mov r0, #0 mrc p15, 0, r1, c1, c0, 0 @ Read control register mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer @@ -98,17 +101,18 @@ ENTRY(cpu_arm926_do_idle) mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable msr cpsr_c, r3 @ Restore FIQ state ret lr +SYM_FUNC_END(cpu_arm926_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm926_flush_icache_all) +SYM_TYPED_FUNC_START(arm926_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm926_flush_icache_all) +SYM_FUNC_END(arm926_flush_icache_all) /* * flush_user_cache_all() @@ -116,15 +120,14 @@ ENDPROC(arm926_flush_icache_all) * Clean and invalidate all cache entries in a particular * address space. */ -ENTRY(arm926_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm926_flush_user_cache_all, arm926_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm926_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm926_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -138,6 +141,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm926_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -149,7 +153,7 @@ __flush_whole_cache: * - end - end address (exclusive) * - flags - vm_flags describing address space */ -ENTRY(arm926_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm926_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -175,6 +179,7 @@ ENTRY(arm926_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm926_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -186,8 +191,11 @@ ENTRY(arm926_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm926_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm926_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm926_coherent_user_range +#endif +SYM_FUNC_END(arm926_coherent_kern_range) /* * coherent_user_range(start, end) @@ -199,7 +207,7 @@ ENTRY(arm926_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm926_coherent_user_range) +SYM_TYPED_FUNC_START(arm926_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -209,6 +217,7 @@ ENTRY(arm926_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm926_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -219,7 +228,7 @@ ENTRY(arm926_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm926_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm926_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -229,6 +238,7 @@ ENTRY(arm926_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm926_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -287,7 +297,7 @@ arm926_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(arm926_dma_flush_range) +SYM_TYPED_FUNC_START(arm926_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -300,6 +310,7 @@ ENTRY(arm926_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm926_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -307,13 +318,13 @@ ENTRY(arm926_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm926_dma_map_area) +SYM_TYPED_FUNC_START(arm926_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm926_dma_clean_range bcs arm926_dma_inv_range b arm926_dma_flush_range -ENDPROC(arm926_dma_map_area) +SYM_FUNC_END(arm926_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -321,17 +332,11 @@ ENDPROC(arm926_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm926_dma_unmap_area) +SYM_TYPED_FUNC_START(arm926_dma_unmap_area) ret lr -ENDPROC(arm926_dma_unmap_area) - - .globl arm926_flush_kern_cache_louis - .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all +SYM_FUNC_END(arm926_dma_unmap_area) - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm926 - -ENTRY(cpu_arm926_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm926_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -340,6 +345,7 @@ ENTRY(cpu_arm926_dcache_clean_area) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_arm926_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -351,7 +357,8 @@ ENTRY(cpu_arm926_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_arm926_switch_mm) + +SYM_TYPED_FUNC_START(cpu_arm926_switch_mm) #ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -367,6 +374,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif ret lr +SYM_FUNC_END(cpu_arm926_switch_mm) /* * cpu_arm926_set_pte_ext(ptep, pte, ext) @@ -374,7 +382,7 @@ ENTRY(cpu_arm926_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm926_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_arm926_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -384,21 +392,22 @@ ENTRY(cpu_arm926_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_arm926_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size .equ cpu_arm926_suspend_size, 4 * 3 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_arm926_do_suspend) +SYM_TYPED_FUNC_START(cpu_arm926_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c1, c0, 0 @ Control register stmia r0, {r4 - r6} ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_arm926_do_suspend) +SYM_FUNC_END(cpu_arm926_do_suspend) -ENTRY(cpu_arm926_do_resume) +SYM_TYPED_FUNC_START(cpu_arm926_do_resume) mov ip, #0 mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches @@ -408,7 +417,7 @@ ENTRY(cpu_arm926_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB address mov r0, r6 @ control register b cpu_resume_mmu -ENDPROC(cpu_arm926_do_resume) +SYM_FUNC_END(cpu_arm926_do_resume) #endif .type __arm926_setup, #function diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index cf9bfcc825ca..0d30bb25c42b 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -25,19 +26,24 @@ * * These are not required. */ -ENTRY(cpu_arm940_proc_init) -ENTRY(cpu_arm940_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm940_proc_init) ret lr +SYM_FUNC_END(cpu_arm940_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm940_switch_mm) + ret lr +SYM_FUNC_END(cpu_arm940_switch_mm) /* * cpu_arm940_proc_fin() */ -ENTRY(cpu_arm940_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm940_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm940_proc_fin) /* * cpu_arm940_reset(loc) @@ -45,7 +51,7 @@ ENTRY(cpu_arm940_proc_fin) * Notes : This sets up everything for a reset */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm940_reset) +SYM_TYPED_FUNC_START(cpu_arm940_reset) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ flush I cache mcr p15, 0, ip, c7, c6, 0 @ flush D cache @@ -55,42 +61,43 @@ ENTRY(cpu_arm940_reset) bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm940_reset) +SYM_FUNC_END(cpu_arm940_reset) .popsection /* * cpu_arm940_do_idle() */ .align 5 -ENTRY(cpu_arm940_do_idle) +SYM_TYPED_FUNC_START(cpu_arm940_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm940_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm940_flush_icache_all) +SYM_TYPED_FUNC_START(arm940_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm940_flush_icache_all) +SYM_FUNC_END(arm940_flush_icache_all) /* * flush_user_cache_all() */ -ENTRY(arm940_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm940_flush_user_cache_all, arm940_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm940_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm940_flush_kern_cache_all) mov r2, #VM_EXEC - /* FALLTHROUGH */ + b arm940_flush_user_cache_range +SYM_FUNC_END(arm940_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -102,7 +109,7 @@ ENTRY(arm940_flush_kern_cache_all) * - end - end address (exclusive) * - flags - vm_flags describing address space */ -ENTRY(arm940_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm940_flush_user_cache_range) mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ flush D cache @@ -119,6 +126,7 @@ ENTRY(arm940_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm940_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -130,8 +138,9 @@ ENTRY(arm940_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm940_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm940_coherent_kern_range) + b arm940_flush_kern_dcache_area +SYM_FUNC_END(arm940_coherent_kern_range) /* * coherent_user_range(start, end) @@ -143,8 +152,11 @@ ENTRY(arm940_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm940_coherent_user_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm940_coherent_user_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm940_flush_kern_dcache_area +#endif +SYM_FUNC_END(arm940_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -155,7 +167,7 @@ ENTRY(arm940_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(arm940_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm940_flush_kern_dcache_area) mov r0, #0 mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments 1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries @@ -167,6 +179,7 @@ ENTRY(arm940_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm940_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -199,7 +212,7 @@ arm940_dma_inv_range: * - end - virtual end address */ arm940_dma_clean_range: -ENTRY(cpu_arm940_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm940_dcache_clean_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments @@ -212,6 +225,7 @@ ENTRY(cpu_arm940_dcache_clean_area) #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_arm940_dcache_clean_area) /* * dma_flush_range(start, end) @@ -222,7 +236,7 @@ ENTRY(cpu_arm940_dcache_clean_area) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm940_dma_flush_range) +SYM_TYPED_FUNC_START(arm940_dma_flush_range) mov ip, #0 mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments 1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries @@ -238,6 +252,7 @@ ENTRY(arm940_dma_flush_range) bcs 1b @ segments 7 to 0 mcr p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm940_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -245,13 +260,13 @@ ENTRY(arm940_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm940_dma_map_area) +SYM_TYPED_FUNC_START(arm940_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm940_dma_clean_range bcs arm940_dma_inv_range b arm940_dma_flush_range -ENDPROC(arm940_dma_map_area) +SYM_FUNC_END(arm940_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -259,15 +274,9 @@ ENDPROC(arm940_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm940_dma_unmap_area) +SYM_TYPED_FUNC_START(arm940_dma_unmap_area) ret lr -ENDPROC(arm940_dma_unmap_area) - - .globl arm940_flush_kern_cache_louis - .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm940 +SYM_FUNC_END(arm940_dma_unmap_area) .type __arm940_setup, #function __arm940_setup: diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 6fb3898ad1cd..27750ace2ced 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -8,6 +8,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -32,19 +33,24 @@ * * These are not required. */ -ENTRY(cpu_arm946_proc_init) -ENTRY(cpu_arm946_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm946_proc_init) ret lr +SYM_FUNC_END(cpu_arm946_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm946_switch_mm) + ret lr +SYM_FUNC_END(cpu_arm946_switch_mm) /* * cpu_arm946_proc_fin() */ -ENTRY(cpu_arm946_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm946_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_arm946_proc_fin) /* * cpu_arm946_reset(loc) @@ -52,7 +58,7 @@ ENTRY(cpu_arm946_proc_fin) * Notes : This sets up everything for a reset */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm946_reset) +SYM_TYPED_FUNC_START(cpu_arm946_reset) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ flush I cache mcr p15, 0, ip, c7, c6, 0 @ flush D cache @@ -62,40 +68,40 @@ ENTRY(cpu_arm946_reset) bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_arm946_reset) +SYM_FUNC_END(cpu_arm946_reset) .popsection /* * cpu_arm946_do_idle() */ .align 5 -ENTRY(cpu_arm946_do_idle) +SYM_TYPED_FUNC_START(cpu_arm946_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_arm946_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(arm946_flush_icache_all) +SYM_TYPED_FUNC_START(arm946_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(arm946_flush_icache_all) +SYM_FUNC_END(arm946_flush_icache_all) /* * flush_user_cache_all() */ -ENTRY(arm946_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(arm946_flush_user_cache_all, arm946_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(arm946_flush_kern_cache_all) +SYM_TYPED_FUNC_START(arm946_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -114,6 +120,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ flush I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm946_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -126,7 +133,7 @@ __flush_whole_cache: * - flags - vm_flags describing address space * (same as arm926) */ -ENTRY(arm946_flush_user_cache_range) +SYM_TYPED_FUNC_START(arm946_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -153,6 +160,7 @@ ENTRY(arm946_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm946_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -164,8 +172,11 @@ ENTRY(arm946_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(arm946_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(arm946_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b arm946_coherent_user_range +#endif +SYM_FUNC_END(arm946_coherent_kern_range) /* * coherent_user_range(start, end) @@ -178,7 +189,7 @@ ENTRY(arm946_coherent_kern_range) * - end - virtual end address * (same as arm926) */ -ENTRY(arm946_coherent_user_range) +SYM_TYPED_FUNC_START(arm946_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -188,6 +199,7 @@ ENTRY(arm946_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(arm946_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -199,7 +211,7 @@ ENTRY(arm946_coherent_user_range) * - size - region size * (same as arm926) */ -ENTRY(arm946_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(arm946_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -209,6 +221,7 @@ ENTRY(arm946_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm946_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -268,7 +281,7 @@ arm946_dma_clean_range: * * (same as arm926) */ -ENTRY(arm946_dma_flush_range) +SYM_TYPED_FUNC_START(arm946_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -281,6 +294,7 @@ ENTRY(arm946_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(arm946_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -288,13 +302,13 @@ ENTRY(arm946_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(arm946_dma_map_area) +SYM_TYPED_FUNC_START(arm946_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq arm946_dma_clean_range bcs arm946_dma_inv_range b arm946_dma_flush_range -ENDPROC(arm946_dma_map_area) +SYM_FUNC_END(arm946_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -302,17 +316,11 @@ ENDPROC(arm946_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(arm946_dma_unmap_area) +SYM_TYPED_FUNC_START(arm946_dma_unmap_area) ret lr -ENDPROC(arm946_dma_unmap_area) - - .globl arm946_flush_kern_cache_louis - .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions arm946 +SYM_FUNC_END(arm946_dma_unmap_area) -ENTRY(cpu_arm946_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_arm946_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -321,6 +329,7 @@ ENTRY(cpu_arm946_dcache_clean_area) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_arm946_dcache_clean_area) .type __arm946_setup, #function __arm946_setup: diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index a054c0e9c034..c480a8400eff 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -6,6 +6,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -24,17 +25,28 @@ * * These are not required. */ -ENTRY(cpu_arm9tdmi_proc_init) -ENTRY(cpu_arm9tdmi_do_idle) -ENTRY(cpu_arm9tdmi_dcache_clean_area) -ENTRY(cpu_arm9tdmi_switch_mm) +SYM_TYPED_FUNC_START(cpu_arm9tdmi_proc_init) ret lr +SYM_FUNC_END(cpu_arm9tdmi_proc_init) + +SYM_TYPED_FUNC_START(cpu_arm9tdmi_do_idle) + ret lr +SYM_FUNC_END(cpu_arm9tdmi_do_idle) + +SYM_TYPED_FUNC_START(cpu_arm9tdmi_dcache_clean_area) + ret lr +SYM_FUNC_END(cpu_arm9tdmi_dcache_clean_area) + +SYM_TYPED_FUNC_START(cpu_arm9tdmi_switch_mm) + ret lr +SYM_FUNC_END(cpu_arm9tdmi_switch_mm) /* * cpu_arm9tdmi_proc_fin() */ -ENTRY(cpu_arm9tdmi_proc_fin) +SYM_TYPED_FUNC_START(cpu_arm9tdmi_proc_fin) ret lr +SYM_FUNC_END(cpu_arm9tdmi_proc_fin) /* * Function: cpu_arm9tdmi_reset(loc) @@ -42,9 +54,9 @@ ENTRY(cpu_arm9tdmi_proc_fin) * Purpose : Sets up everything for a reset and jump to the location for soft reset. */ .pushsection .idmap.text, "ax" -ENTRY(cpu_arm9tdmi_reset) +SYM_TYPED_FUNC_START(cpu_arm9tdmi_reset) ret r0 -ENDPROC(cpu_arm9tdmi_reset) +SYM_FUNC_END(cpu_arm9tdmi_reset) .popsection .type __arm9tdmi_setup, #function diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 2c73e0d47d08..7c16ccac8a05 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -26,13 +27,14 @@ /* * cpu_fa526_proc_init() */ -ENTRY(cpu_fa526_proc_init) +SYM_TYPED_FUNC_START(cpu_fa526_proc_init) ret lr +SYM_FUNC_END(cpu_fa526_proc_init) /* * cpu_fa526_proc_fin() */ -ENTRY(cpu_fa526_proc_fin) +SYM_TYPED_FUNC_START(cpu_fa526_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. @@ -40,6 +42,7 @@ ENTRY(cpu_fa526_proc_fin) nop nop ret lr +SYM_FUNC_END(cpu_fa526_proc_fin) /* * cpu_fa526_reset(loc) @@ -52,7 +55,7 @@ ENTRY(cpu_fa526_proc_fin) */ .align 4 .pushsection .idmap.text, "ax" -ENTRY(cpu_fa526_reset) +SYM_TYPED_FUNC_START(cpu_fa526_reset) /* TODO: Use CP8 if possible... */ mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -68,24 +71,25 @@ ENTRY(cpu_fa526_reset) nop nop ret r0 -ENDPROC(cpu_fa526_reset) +SYM_FUNC_END(cpu_fa526_reset) .popsection /* * cpu_fa526_do_idle() */ .align 4 -ENTRY(cpu_fa526_do_idle) +SYM_TYPED_FUNC_START(cpu_fa526_do_idle) ret lr +SYM_FUNC_END(cpu_fa526_do_idle) - -ENTRY(cpu_fa526_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_fa526_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_fa526_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -97,7 +101,7 @@ ENTRY(cpu_fa526_dcache_clean_area) * pgd: new page tables */ .align 4 -ENTRY(cpu_fa526_switch_mm) +SYM_TYPED_FUNC_START(cpu_fa526_switch_mm) #ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -113,6 +117,7 @@ ENTRY(cpu_fa526_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB #endif ret lr +SYM_FUNC_END(cpu_fa526_switch_mm) /* * cpu_fa526_set_pte_ext(ptep, pte, ext) @@ -120,7 +125,7 @@ ENTRY(cpu_fa526_switch_mm) * Set a PTE and flush it out */ .align 4 -ENTRY(cpu_fa526_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_fa526_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -129,6 +134,7 @@ ENTRY(cpu_fa526_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_fa526_set_pte_ext) .type __fa526_setup, #function __fa526_setup: diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 072ff9b451f8..f67b2ffac854 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -43,7 +44,7 @@ __cache_params: /* * cpu_feroceon_proc_init() */ -ENTRY(cpu_feroceon_proc_init) +SYM_TYPED_FUNC_START(cpu_feroceon_proc_init) mrc p15, 0, r0, c0, c0, 1 @ read cache type register ldr r1, __cache_params mov r2, #(16 << 5) @@ -61,11 +62,12 @@ ENTRY(cpu_feroceon_proc_init) str_l r1, VFP_arch_feroceon, r2 #endif ret lr +SYM_FUNC_END(cpu_feroceon_proc_init) /* * cpu_feroceon_proc_fin() */ -ENTRY(cpu_feroceon_proc_fin) +SYM_TYPED_FUNC_START(cpu_feroceon_proc_fin) #if defined(CONFIG_CACHE_FEROCEON_L2) && \ !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) mov r0, #0 @@ -78,6 +80,7 @@ ENTRY(cpu_feroceon_proc_fin) bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_feroceon_proc_fin) /* * cpu_feroceon_reset(loc) @@ -90,7 +93,7 @@ ENTRY(cpu_feroceon_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_feroceon_reset) +SYM_TYPED_FUNC_START(cpu_feroceon_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -102,7 +105,7 @@ ENTRY(cpu_feroceon_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_feroceon_reset) +SYM_FUNC_END(cpu_feroceon_reset) .popsection /* @@ -111,22 +114,23 @@ ENDPROC(cpu_feroceon_reset) * Called with IRQs disabled */ .align 5 -ENTRY(cpu_feroceon_do_idle) +SYM_TYPED_FUNC_START(cpu_feroceon_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt ret lr +SYM_FUNC_END(cpu_feroceon_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(feroceon_flush_icache_all) +SYM_TYPED_FUNC_START(feroceon_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(feroceon_flush_icache_all) +SYM_FUNC_END(feroceon_flush_icache_all) /* * flush_user_cache_all() @@ -135,15 +139,14 @@ ENDPROC(feroceon_flush_icache_all) * address space. */ .align 5 -ENTRY(feroceon_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(feroceon_flush_user_cache_all, feroceon_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(feroceon_flush_kern_cache_all) +SYM_TYPED_FUNC_START(feroceon_flush_kern_cache_all) mov r2, #VM_EXEC __flush_whole_cache: @@ -161,6 +164,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -173,7 +177,7 @@ __flush_whole_cache: * - flags - vm_flags describing address space */ .align 5 -ENTRY(feroceon_flush_user_cache_range) +SYM_TYPED_FUNC_START(feroceon_flush_user_cache_range) sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT bgt __flush_whole_cache @@ -190,6 +194,7 @@ ENTRY(feroceon_flush_user_cache_range) mov ip, #0 mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -202,8 +207,11 @@ ENTRY(feroceon_flush_user_cache_range) * - end - virtual end address */ .align 5 -ENTRY(feroceon_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(feroceon_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b feroceon_coherent_user_range +#endif +SYM_FUNC_END(feroceon_coherent_kern_range) /* * coherent_user_range(start, end) @@ -215,7 +223,7 @@ ENTRY(feroceon_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(feroceon_coherent_user_range) +SYM_TYPED_FUNC_START(feroceon_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -225,6 +233,7 @@ ENTRY(feroceon_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(feroceon_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -236,7 +245,7 @@ ENTRY(feroceon_coherent_user_range) * - size - region size */ .align 5 -ENTRY(feroceon_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(feroceon_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -246,9 +255,10 @@ ENTRY(feroceon_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_flush_kern_dcache_area) .align 5 -ENTRY(feroceon_range_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(feroceon_range_flush_kern_dcache_area) mrs r2, cpsr add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive orr r3, r2, #PSR_I_BIT @@ -260,6 +270,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_range_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -346,7 +357,7 @@ feroceon_range_dma_clean_range: * - end - virtual end address */ .align 5 -ENTRY(feroceon_dma_flush_range) +SYM_TYPED_FUNC_START(feroceon_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -354,9 +365,10 @@ ENTRY(feroceon_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_dma_flush_range) .align 5 -ENTRY(feroceon_range_dma_flush_range) +SYM_TYPED_FUNC_START(feroceon_range_dma_flush_range) mrs r2, cpsr cmp r1, r0 subne r1, r1, #1 @ top address is inclusive @@ -367,6 +379,7 @@ ENTRY(feroceon_range_dma_flush_range) msr cpsr_c, r2 @ restore interrupts mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(feroceon_range_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -374,13 +387,13 @@ ENTRY(feroceon_range_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(feroceon_dma_map_area) +SYM_TYPED_FUNC_START(feroceon_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq feroceon_dma_clean_range bcs feroceon_dma_inv_range b feroceon_dma_flush_range -ENDPROC(feroceon_dma_map_area) +SYM_FUNC_END(feroceon_dma_map_area) /* * dma_map_area(start, size, dir) @@ -388,13 +401,13 @@ ENDPROC(feroceon_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(feroceon_range_dma_map_area) +SYM_TYPED_FUNC_START(feroceon_range_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq feroceon_range_dma_clean_range bcs feroceon_range_dma_inv_range b feroceon_range_dma_flush_range -ENDPROC(feroceon_range_dma_map_area) +SYM_FUNC_END(feroceon_range_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -402,39 +415,12 @@ ENDPROC(feroceon_range_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(feroceon_dma_unmap_area) +SYM_TYPED_FUNC_START(feroceon_dma_unmap_area) ret lr -ENDPROC(feroceon_dma_unmap_area) - - .globl feroceon_flush_kern_cache_louis - .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions feroceon - -.macro range_alias basename - .globl feroceon_range_\basename - .type feroceon_range_\basename , %function - .equ feroceon_range_\basename , feroceon_\basename -.endm - -/* - * Most of the cache functions are unchanged for this case. - * Export suitable alias symbols for the unchanged functions: - */ - range_alias flush_icache_all - range_alias flush_user_cache_all - range_alias flush_kern_cache_all - range_alias flush_kern_cache_louis - range_alias flush_user_cache_range - range_alias coherent_kern_range - range_alias coherent_user_range - range_alias dma_unmap_area - - define_cache_functions feroceon_range +SYM_FUNC_END(feroceon_dma_unmap_area) .align 5 -ENTRY(cpu_feroceon_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_feroceon_dcache_clean_area) #if defined(CONFIG_CACHE_FEROCEON_L2) && \ !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) mov r2, r0 @@ -453,6 +439,7 @@ ENTRY(cpu_feroceon_dcache_clean_area) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_feroceon_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -464,7 +451,7 @@ ENTRY(cpu_feroceon_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_feroceon_switch_mm) +SYM_TYPED_FUNC_START(cpu_feroceon_switch_mm) #ifdef CONFIG_MMU /* * Note: we wish to call __flush_whole_cache but we need to preserve @@ -485,6 +472,7 @@ ENTRY(cpu_feroceon_switch_mm) #else ret lr #endif +SYM_FUNC_END(cpu_feroceon_switch_mm) /* * cpu_feroceon_set_pte_ext(ptep, pte, ext) @@ -492,7 +480,7 @@ ENTRY(cpu_feroceon_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_feroceon_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_feroceon_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 mov r0, r0 @@ -504,21 +492,22 @@ ENTRY(cpu_feroceon_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_feroceon_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ .globl cpu_feroceon_suspend_size .equ cpu_feroceon_suspend_size, 4 * 3 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_feroceon_do_suspend) +SYM_TYPED_FUNC_START(cpu_feroceon_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID mrc p15, 0, r5, c3, c0, 0 @ Domain ID mrc p15, 0, r6, c1, c0, 0 @ Control register stmia r0, {r4 - r6} ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_feroceon_do_suspend) +SYM_FUNC_END(cpu_feroceon_do_suspend) -ENTRY(cpu_feroceon_do_resume) +SYM_TYPED_FUNC_START(cpu_feroceon_do_resume) mov ip, #0 mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches @@ -528,7 +517,7 @@ ENTRY(cpu_feroceon_do_resume) mcr p15, 0, r1, c2, c0, 0 @ TTB address mov r0, r6 @ control register b cpu_resume_mmu -ENDPROC(cpu_feroceon_do_resume) +SYM_FUNC_END(cpu_feroceon_do_resume) #endif .type __feroceon_setup, #function diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e43f6d716b4b..e388c4cc0c44 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -320,39 +320,6 @@ ENTRY(\name\()_processor_functions) #endif .endm -.macro define_cache_functions name:req - .align 2 - .type \name\()_cache_fns, #object -ENTRY(\name\()_cache_fns) - .long \name\()_flush_icache_all - .long \name\()_flush_kern_cache_all - .long \name\()_flush_kern_cache_louis - .long \name\()_flush_user_cache_all - .long \name\()_flush_user_cache_range - .long \name\()_coherent_kern_range - .long \name\()_coherent_user_range - .long \name\()_flush_kern_dcache_area - .long \name\()_dma_map_area - .long \name\()_dma_unmap_area - .long \name\()_dma_flush_range - .size \name\()_cache_fns, . - \name\()_cache_fns -.endm - -.macro define_tlb_functions name:req, flags_up:req, flags_smp - .type \name\()_tlb_fns, #object - .align 2 -ENTRY(\name\()_tlb_fns) - .long \name\()_flush_user_tlb_range - .long \name\()_flush_kern_tlb_range - .ifnb \flags_smp - ALT_SMP(.long \flags_smp ) - ALT_UP(.long \flags_up ) - .else - .long \flags_up - .endif - .size \name\()_tlb_fns, . - \name\()_tlb_fns -.endm - .macro globl_equ x, y .globl \x .equ \x, \y diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 1645ccaffe96..8e9f38da863a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -9,6 +9,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -31,18 +32,20 @@ /* * cpu_mohawk_proc_init() */ -ENTRY(cpu_mohawk_proc_init) +SYM_TYPED_FUNC_START(cpu_mohawk_proc_init) ret lr +SYM_FUNC_END(cpu_mohawk_proc_init) /* * cpu_mohawk_proc_fin() */ -ENTRY(cpu_mohawk_proc_fin) +SYM_TYPED_FUNC_START(cpu_mohawk_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1800 @ ...iz........... bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_mohawk_proc_fin) /* * cpu_mohawk_reset(loc) @@ -57,7 +60,7 @@ ENTRY(cpu_mohawk_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_mohawk_reset) +SYM_TYPED_FUNC_START(cpu_mohawk_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -67,7 +70,7 @@ ENTRY(cpu_mohawk_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_mohawk_reset) +SYM_FUNC_END(cpu_mohawk_reset) .popsection /* @@ -76,22 +79,23 @@ ENDPROC(cpu_mohawk_reset) * Called with IRQs disabled */ .align 5 -ENTRY(cpu_mohawk_do_idle) +SYM_TYPED_FUNC_START(cpu_mohawk_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt ret lr +SYM_FUNC_END(cpu_mohawk_do_idle) /* * flush_icache_all() * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(mohawk_flush_icache_all) +SYM_TYPED_FUNC_START(mohawk_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(mohawk_flush_icache_all) +SYM_FUNC_END(mohawk_flush_icache_all) /* * flush_user_cache_all() @@ -99,15 +103,14 @@ ENDPROC(mohawk_flush_icache_all) * Clean and invalidate all cache entries in a particular * address space. */ -ENTRY(mohawk_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(mohawk_flush_user_cache_all, mohawk_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(mohawk_flush_kern_cache_all) +SYM_TYPED_FUNC_START(mohawk_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -116,6 +119,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer ret lr +SYM_FUNC_END(mohawk_flush_kern_cache_all) /* * flush_user_cache_range(start, end, flags) @@ -129,7 +133,7 @@ __flush_whole_cache: * * (same as arm926) */ -ENTRY(mohawk_flush_user_cache_range) +SYM_TYPED_FUNC_START(mohawk_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT @@ -146,6 +150,7 @@ ENTRY(mohawk_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(mohawk_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -157,8 +162,11 @@ ENTRY(mohawk_flush_user_cache_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(mohawk_coherent_kern_range) - /* FALLTHROUGH */ +SYM_TYPED_FUNC_START(mohawk_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b mohawk_coherent_user_range +#endif +SYM_FUNC_END(mohawk_coherent_kern_range) /* * coherent_user_range(start, end) @@ -172,7 +180,7 @@ ENTRY(mohawk_coherent_kern_range) * * (same as arm926) */ -ENTRY(mohawk_coherent_user_range) +SYM_TYPED_FUNC_START(mohawk_coherent_user_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry @@ -182,6 +190,7 @@ ENTRY(mohawk_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 ret lr +SYM_FUNC_END(mohawk_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -192,7 +201,7 @@ ENTRY(mohawk_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(mohawk_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(mohawk_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE @@ -202,6 +211,7 @@ ENTRY(mohawk_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(mohawk_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -256,7 +266,7 @@ mohawk_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(mohawk_dma_flush_range) +SYM_TYPED_FUNC_START(mohawk_dma_flush_range) bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry @@ -265,6 +275,7 @@ ENTRY(mohawk_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(mohawk_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -272,13 +283,13 @@ ENTRY(mohawk_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(mohawk_dma_map_area) +SYM_TYPED_FUNC_START(mohawk_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq mohawk_dma_clean_range bcs mohawk_dma_inv_range b mohawk_dma_flush_range -ENDPROC(mohawk_dma_map_area) +SYM_FUNC_END(mohawk_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -286,23 +297,18 @@ ENDPROC(mohawk_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(mohawk_dma_unmap_area) +SYM_TYPED_FUNC_START(mohawk_dma_unmap_area) ret lr -ENDPROC(mohawk_dma_unmap_area) - - .globl mohawk_flush_kern_cache_louis - .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions mohawk +SYM_FUNC_END(mohawk_dma_unmap_area) -ENTRY(cpu_mohawk_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_mohawk_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr +SYM_FUNC_END(cpu_mohawk_dcache_clean_area) /* * cpu_mohawk_switch_mm(pgd) @@ -312,7 +318,7 @@ ENTRY(cpu_mohawk_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_mohawk_switch_mm) +SYM_TYPED_FUNC_START(cpu_mohawk_switch_mm) mov ip, #0 mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -321,6 +327,7 @@ ENTRY(cpu_mohawk_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ret lr +SYM_FUNC_END(cpu_mohawk_switch_mm) /* * cpu_mohawk_set_pte_ext(ptep, pte, ext) @@ -328,7 +335,7 @@ ENTRY(cpu_mohawk_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_mohawk_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_mohawk_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext mov r0, r0 @@ -336,11 +343,12 @@ ENTRY(cpu_mohawk_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB ret lr #endif +SYM_FUNC_END(cpu_mohawk_set_pte_ext) .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_mohawk_do_suspend) +SYM_TYPED_FUNC_START(cpu_mohawk_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode mrc p15, 0, r5, c15, c1, 0 @ CP access reg @@ -351,9 +359,9 @@ ENTRY(cpu_mohawk_do_suspend) bic r4, r4, #2 @ clear frequency change bit stmia r0, {r4 - r9} @ store cp regs ldmia sp!, {r4 - r9, pc} -ENDPROC(cpu_mohawk_do_suspend) +SYM_FUNC_END(cpu_mohawk_do_suspend) -ENTRY(cpu_mohawk_do_resume) +SYM_TYPED_FUNC_START(cpu_mohawk_do_resume) ldmia r0, {r4 - r9} @ load cp regs mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB @@ -369,7 +377,7 @@ ENTRY(cpu_mohawk_do_resume) mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg mov r0, r9 @ control register b cpu_resume_mmu -ENDPROC(cpu_mohawk_do_resume) +SYM_FUNC_END(cpu_mohawk_do_resume) #endif .type __mohawk_setup, #function diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 4071f7a61cb6..3da76fab8ac3 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -12,6 +12,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -32,15 +33,16 @@ /* * cpu_sa110_proc_init() */ -ENTRY(cpu_sa110_proc_init) +SYM_TYPED_FUNC_START(cpu_sa110_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching ret lr +SYM_FUNC_END(cpu_sa110_proc_init) /* * cpu_sa110_proc_fin() */ -ENTRY(cpu_sa110_proc_fin) +SYM_TYPED_FUNC_START(cpu_sa110_proc_fin) mov r0, #0 mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching mrc p15, 0, r0, c1, c0, 0 @ ctrl register @@ -48,6 +50,7 @@ ENTRY(cpu_sa110_proc_fin) bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_sa110_proc_fin) /* * cpu_sa110_reset(loc) @@ -60,7 +63,7 @@ ENTRY(cpu_sa110_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_sa110_reset) +SYM_TYPED_FUNC_START(cpu_sa110_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -72,7 +75,7 @@ ENTRY(cpu_sa110_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_sa110_reset) +SYM_FUNC_END(cpu_sa110_reset) .popsection /* @@ -88,7 +91,7 @@ ENDPROC(cpu_sa110_reset) */ .align 5 -ENTRY(cpu_sa110_do_idle) +SYM_TYPED_FUNC_START(cpu_sa110_do_idle) mcr p15, 0, ip, c15, c2, 2 @ disable clock switching ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc ldr r1, [r1, #0] @ force switch to MCLK @@ -101,6 +104,7 @@ ENTRY(cpu_sa110_do_idle) mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching ret lr +SYM_FUNC_END(cpu_sa110_do_idle) /* ================================= CACHE ================================ */ @@ -113,12 +117,13 @@ ENTRY(cpu_sa110_do_idle) * addr: cache-unaligned virtual address */ .align 5 -ENTRY(cpu_sa110_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_sa110_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b ret lr +SYM_FUNC_END(cpu_sa110_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -130,7 +135,7 @@ ENTRY(cpu_sa110_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_sa110_switch_mm) +SYM_TYPED_FUNC_START(cpu_sa110_switch_mm) #ifdef CONFIG_MMU str lr, [sp, #-4]! bl v4wb_flush_kern_cache_all @ clears IP @@ -140,6 +145,7 @@ ENTRY(cpu_sa110_switch_mm) #else ret lr #endif +SYM_FUNC_END(cpu_sa110_switch_mm) /* * cpu_sa110_set_pte_ext(ptep, pte, ext) @@ -147,7 +153,7 @@ ENTRY(cpu_sa110_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_sa110_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_sa110_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 mov r0, r0 @@ -155,6 +161,7 @@ ENTRY(cpu_sa110_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_sa110_set_pte_ext) .type __sa110_setup, #function __sa110_setup: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index e723bd4119d3..7c496195e440 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -17,6 +17,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> @@ -36,11 +37,12 @@ /* * cpu_sa1100_proc_init() */ -ENTRY(cpu_sa1100_proc_init) +SYM_TYPED_FUNC_START(cpu_sa1100_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland ret lr +SYM_FUNC_END(cpu_sa1100_proc_init) /* * cpu_sa1100_proc_fin() @@ -49,13 +51,14 @@ ENTRY(cpu_sa1100_proc_init) * - Disable interrupts * - Clean and turn off caches. */ -ENTRY(cpu_sa1100_proc_fin) +SYM_TYPED_FUNC_START(cpu_sa1100_proc_fin) mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_sa1100_proc_fin) /* * cpu_sa1100_reset(loc) @@ -68,7 +71,7 @@ ENTRY(cpu_sa1100_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_sa1100_reset) +SYM_TYPED_FUNC_START(cpu_sa1100_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB @@ -80,7 +83,7 @@ ENTRY(cpu_sa1100_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register ret r0 -ENDPROC(cpu_sa1100_reset) +SYM_FUNC_END(cpu_sa1100_reset) .popsection /* @@ -95,7 +98,7 @@ ENDPROC(cpu_sa1100_reset) * 3 = switch to fast processor clock */ .align 5 -ENTRY(cpu_sa1100_do_idle) +SYM_TYPED_FUNC_START(cpu_sa1100_do_idle) mov r0, r0 @ 4 nop padding mov r0, r0 mov r0, r0 @@ -111,6 +114,7 @@ ENTRY(cpu_sa1100_do_idle) mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching ret lr +SYM_FUNC_END(cpu_sa1100_do_idle) /* ================================= CACHE ================================ */ @@ -123,12 +127,13 @@ ENTRY(cpu_sa1100_do_idle) * addr: cache-unaligned virtual address */ .align 5 -ENTRY(cpu_sa1100_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_sa1100_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b ret lr +SYM_FUNC_END(cpu_sa1100_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -140,7 +145,7 @@ ENTRY(cpu_sa1100_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_sa1100_switch_mm) +SYM_TYPED_FUNC_START(cpu_sa1100_switch_mm) #ifdef CONFIG_MMU str lr, [sp, #-4]! bl v4wb_flush_kern_cache_all @ clears IP @@ -151,6 +156,7 @@ ENTRY(cpu_sa1100_switch_mm) #else ret lr #endif +SYM_FUNC_END(cpu_sa1100_switch_mm) /* * cpu_sa1100_set_pte_ext(ptep, pte, ext) @@ -158,7 +164,7 @@ ENTRY(cpu_sa1100_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_sa1100_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_sa1100_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 mov r0, r0 @@ -166,20 +172,21 @@ ENTRY(cpu_sa1100_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif ret lr +SYM_FUNC_END(cpu_sa1100_set_pte_ext) .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_sa1100_do_suspend) +SYM_TYPED_FUNC_START(cpu_sa1100_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c3, c0, 0 @ domain ID mrc p15, 0, r5, c13, c0, 0 @ PID mrc p15, 0, r6, c1, c0, 0 @ control reg stmia r0, {r4 - r6} @ store cp regs ldmfd sp!, {r4 - r6, pc} -ENDPROC(cpu_sa1100_do_suspend) +SYM_FUNC_END(cpu_sa1100_do_suspend) -ENTRY(cpu_sa1100_do_resume) +SYM_TYPED_FUNC_START(cpu_sa1100_do_resume) ldmia r0, {r4 - r6} @ load cp regs mov ip, #0 mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs @@ -192,7 +199,7 @@ ENTRY(cpu_sa1100_do_resume) mcr p15, 0, r5, c13, c0, 0 @ PID mov r0, r6 @ control register b cpu_resume_mmu -ENDPROC(cpu_sa1100_do_resume) +SYM_FUNC_END(cpu_sa1100_do_resume) #endif .type __sa1100_setup, #function diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 203dff89ab1a..90a01f5950b9 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -8,6 +8,7 @@ * This is the "shell" of the ARMv6 processor support. */ #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/linkage.h> #include <linux/pgtable.h> #include <asm/assembler.h> @@ -34,15 +35,17 @@ .arch armv6 -ENTRY(cpu_v6_proc_init) +SYM_TYPED_FUNC_START(cpu_v6_proc_init) ret lr +SYM_FUNC_END(cpu_v6_proc_init) -ENTRY(cpu_v6_proc_fin) +SYM_TYPED_FUNC_START(cpu_v6_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_v6_proc_fin) /* * cpu_v6_reset(loc) @@ -55,14 +58,14 @@ ENTRY(cpu_v6_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_v6_reset) +SYM_TYPED_FUNC_START(cpu_v6_reset) mrc p15, 0, r1, c1, c0, 0 @ ctrl register bic r1, r1, #0x1 @ ...............m mcr p15, 0, r1, c1, c0, 0 @ disable MMU mov r1, #0 mcr p15, 0, r1, c7, c5, 4 @ ISB ret r0 -ENDPROC(cpu_v6_reset) +SYM_FUNC_END(cpu_v6_reset) .popsection /* @@ -72,18 +75,20 @@ ENDPROC(cpu_v6_reset) * * IRQs are already disabled. */ -ENTRY(cpu_v6_do_idle) +SYM_TYPED_FUNC_START(cpu_v6_do_idle) mov r1, #0 mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt ret lr +SYM_FUNC_END(cpu_v6_do_idle) -ENTRY(cpu_v6_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_v6_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b ret lr +SYM_FUNC_END(cpu_v6_dcache_clean_area) /* * cpu_v6_switch_mm(pgd_phys, tsk) @@ -95,7 +100,7 @@ ENTRY(cpu_v6_dcache_clean_area) * It is assumed that: * - we are not using split page tables */ -ENTRY(cpu_v6_switch_mm) +SYM_TYPED_FUNC_START(cpu_v6_switch_mm) #ifdef CONFIG_MMU mov r2, #0 mmid r1, r1 @ get mm->context.id @@ -113,6 +118,7 @@ ENTRY(cpu_v6_switch_mm) mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif ret lr +SYM_FUNC_END(cpu_v6_switch_mm) /* * cpu_v6_set_pte_ext(ptep, pte, ext) @@ -126,17 +132,18 @@ ENTRY(cpu_v6_switch_mm) */ armv6_mt_table cpu_v6 -ENTRY(cpu_v6_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_v6_set_pte_ext) #ifdef CONFIG_MMU armv6_set_pte_ext cpu_v6 #endif ret lr +SYM_FUNC_END(cpu_v6_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size .equ cpu_v6_suspend_size, 4 * 6 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v6_do_suspend) +SYM_TYPED_FUNC_START(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID #ifdef CONFIG_MMU @@ -148,9 +155,9 @@ ENTRY(cpu_v6_do_suspend) mrc p15, 0, r9, c1, c0, 0 @ control register stmia r0, {r4 - r9} ldmfd sp!, {r4- r9, pc} -ENDPROC(cpu_v6_do_suspend) +SYM_FUNC_END(cpu_v6_do_suspend) -ENTRY(cpu_v6_do_resume) +SYM_TYPED_FUNC_START(cpu_v6_do_resume) mov ip, #0 mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -172,7 +179,7 @@ ENTRY(cpu_v6_do_resume) mcr p15, 0, ip, c7, c5, 4 @ ISB mov r0, r9 @ control register b cpu_resume_mmu -ENDPROC(cpu_v6_do_resume) +SYM_FUNC_END(cpu_v6_do_resume) #endif string cpu_v6_name, "ARMv6-compatible processor" diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 0a3083ad19c2..1007702fcaf3 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -40,7 +40,7 @@ * even on Cortex-A8 revisions not affected by 430973. * If IBE is not set, the flush BTAC/BTB won't do anything. */ -ENTRY(cpu_v7_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) @@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm) isb #endif bx lr -ENDPROC(cpu_v7_switch_mm) +SYM_FUNC_END(cpu_v7_switch_mm) /* * cpu_v7_set_pte_ext(ptep, pte) @@ -71,7 +71,7 @@ ENDPROC(cpu_v7_switch_mm) * - pte - PTE value to store * - ext - value for extended PTE bits */ -ENTRY(cpu_v7_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU str r1, [r0] @ linux version @@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif bx lr -ENDPROC(cpu_v7_set_pte_ext) +SYM_FUNC_END(cpu_v7_set_pte_ext) /* * Memory region attributes with SCTLR.TRE=1 diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 131984462d0d..bdabc15cde56 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -42,7 +42,7 @@ * Set the translation table base pointer to be pgd_phys (physical address of * the new TTB). */ -ENTRY(cpu_v7_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r2, r2 asid r2, r2 @@ -51,7 +51,7 @@ ENTRY(cpu_v7_switch_mm) isb #endif ret lr -ENDPROC(cpu_v7_switch_mm) +SYM_FUNC_END(cpu_v7_switch_mm) #ifdef __ARMEB__ #define rl r3 @@ -68,7 +68,7 @@ ENDPROC(cpu_v7_switch_mm) * - ptep - pointer to level 3 translation table entry * - pte - PTE value to store (64-bit in r2 and r3) */ -ENTRY(cpu_v7_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU tst rl, #L_PTE_VALID beq 1f @@ -87,7 +87,7 @@ ENTRY(cpu_v7_set_pte_ext) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif ret lr -ENDPROC(cpu_v7_set_pte_ext) +SYM_FUNC_END(cpu_v7_set_pte_ext) /* * Memory region attributes for LPAE (defined in pgtable-3level.h): diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 193c7aeb6703..5fb9a6aecb00 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -7,6 +7,7 @@ * This is the "shell" of the ARMv7 processor support. */ #include <linux/arm-smccc.h> +#include <linux/cfi_types.h> #include <linux/init.h> #include <linux/linkage.h> #include <linux/pgtable.h> @@ -26,17 +27,17 @@ .arch armv7-a -ENTRY(cpu_v7_proc_init) +SYM_TYPED_FUNC_START(cpu_v7_proc_init) ret lr -ENDPROC(cpu_v7_proc_init) +SYM_FUNC_END(cpu_v7_proc_init) -ENTRY(cpu_v7_proc_fin) +SYM_TYPED_FUNC_START(cpu_v7_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr -ENDPROC(cpu_v7_proc_fin) +SYM_FUNC_END(cpu_v7_proc_fin) /* * cpu_v7_reset(loc, hyp) @@ -53,7 +54,7 @@ ENDPROC(cpu_v7_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_v7_reset) +SYM_TYPED_FUNC_START(cpu_v7_reset) mrc p15, 0, r2, c1, c0, 0 @ ctrl register bic r2, r2, #0x1 @ ...............m THUMB( bic r2, r2, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) @@ -64,7 +65,7 @@ ENTRY(cpu_v7_reset) bne __hyp_soft_restart #endif bx r0 -ENDPROC(cpu_v7_reset) +SYM_FUNC_END(cpu_v7_reset) .popsection /* @@ -74,13 +75,13 @@ ENDPROC(cpu_v7_reset) * * IRQs are already disabled. */ -ENTRY(cpu_v7_do_idle) +SYM_TYPED_FUNC_START(cpu_v7_do_idle) dsb @ WFI may enter a low-power mode wfi ret lr -ENDPROC(cpu_v7_do_idle) +SYM_FUNC_END(cpu_v7_do_idle) -ENTRY(cpu_v7_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_v7_dcache_clean_area) ALT_SMP(W(nop)) @ MP extensions imply L1 PTW ALT_UP_B(1f) ret lr @@ -91,38 +92,39 @@ ENTRY(cpu_v7_dcache_clean_area) bhi 2b dsb ishst ret lr -ENDPROC(cpu_v7_dcache_clean_area) +SYM_FUNC_END(cpu_v7_dcache_clean_area) #ifdef CONFIG_ARM_PSCI .arch_extension sec -ENTRY(cpu_v7_smc_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7_smc_switch_mm) stmfd sp!, {r0 - r3} movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 smc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) +SYM_FUNC_END(cpu_v7_smc_switch_mm) .arch_extension virt -ENTRY(cpu_v7_hvc_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7_hvc_switch_mm) stmfd sp!, {r0 - r3} movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 hvc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_hvc_switch_mm) +SYM_FUNC_END(cpu_v7_hvc_switch_mm) #endif -ENTRY(cpu_v7_iciallu_switch_mm) + +SYM_TYPED_FUNC_START(cpu_v7_iciallu_switch_mm) mov r3, #0 mcr p15, 0, r3, c7, c5, 0 @ ICIALLU b cpu_v7_switch_mm -ENDPROC(cpu_v7_iciallu_switch_mm) -ENTRY(cpu_v7_bpiall_switch_mm) +SYM_FUNC_END(cpu_v7_iciallu_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7_bpiall_switch_mm) mov r3, #0 mcr p15, 0, r3, c7, c5, 6 @ flush BTAC/BTB b cpu_v7_switch_mm -ENDPROC(cpu_v7_bpiall_switch_mm) +SYM_FUNC_END(cpu_v7_bpiall_switch_mm) string cpu_v7_name, "ARMv7 Processor" .align @@ -131,7 +133,7 @@ ENDPROC(cpu_v7_bpiall_switch_mm) .globl cpu_v7_suspend_size .equ cpu_v7_suspend_size, 4 * 9 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v7_do_suspend) +SYM_TYPED_FUNC_START(cpu_v7_do_suspend) stmfd sp!, {r4 - r11, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID @@ -150,9 +152,9 @@ ENTRY(cpu_v7_do_suspend) mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control stmia r0, {r5 - r11} ldmfd sp!, {r4 - r11, pc} -ENDPROC(cpu_v7_do_suspend) +SYM_FUNC_END(cpu_v7_do_suspend) -ENTRY(cpu_v7_do_resume) +SYM_TYPED_FUNC_START(cpu_v7_do_resume) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID @@ -186,22 +188,22 @@ ENTRY(cpu_v7_do_resume) dsb mov r0, r8 @ control register b cpu_resume_mmu -ENDPROC(cpu_v7_do_resume) +SYM_FUNC_END(cpu_v7_do_resume) #endif .globl cpu_ca9mp_suspend_size .equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_ca9mp_do_suspend) +SYM_TYPED_FUNC_START(cpu_ca9mp_do_suspend) stmfd sp!, {r4 - r5} mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register mrc p15, 0, r5, c15, c0, 0 @ Power register stmia r0!, {r4 - r5} ldmfd sp!, {r4 - r5} b cpu_v7_do_suspend -ENDPROC(cpu_ca9mp_do_suspend) +SYM_FUNC_END(cpu_ca9mp_do_suspend) -ENTRY(cpu_ca9mp_do_resume) +SYM_TYPED_FUNC_START(cpu_ca9mp_do_resume) ldmia r0!, {r4 - r5} mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register teq r4, r10 @ Already restored? @@ -210,7 +212,7 @@ ENTRY(cpu_ca9mp_do_resume) teq r5, r10 @ Already restored? mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it b cpu_v7_do_resume -ENDPROC(cpu_ca9mp_do_resume) +SYM_FUNC_END(cpu_ca9mp_do_resume) #endif #ifdef CONFIG_CPU_PJ4B @@ -220,18 +222,18 @@ ENDPROC(cpu_ca9mp_do_resume) globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin globl_equ cpu_pj4b_reset, cpu_v7_reset #ifdef CONFIG_PJ4B_ERRATA_4742 -ENTRY(cpu_pj4b_do_idle) +SYM_TYPED_FUNC_START(cpu_pj4b_do_idle) dsb @ WFI may enter a low-power mode wfi dsb @barrier ret lr -ENDPROC(cpu_pj4b_do_idle) +SYM_FUNC_END(cpu_pj4b_do_idle) #else globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle #endif globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_pj4b_do_suspend) +SYM_TYPED_FUNC_START(cpu_pj4b_do_suspend) stmfd sp!, {r6 - r10} mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 @@ -241,9 +243,9 @@ ENTRY(cpu_pj4b_do_suspend) stmia r0!, {r6 - r10} ldmfd sp!, {r6 - r10} b cpu_v7_do_suspend -ENDPROC(cpu_pj4b_do_suspend) +SYM_FUNC_END(cpu_pj4b_do_suspend) -ENTRY(cpu_pj4b_do_resume) +SYM_TYPED_FUNC_START(cpu_pj4b_do_resume) ldmia r0!, {r6 - r10} mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0 @@ -251,7 +253,7 @@ ENTRY(cpu_pj4b_do_resume) mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1 mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC b cpu_v7_do_resume -ENDPROC(cpu_pj4b_do_resume) +SYM_FUNC_END(cpu_pj4b_do_resume) #endif .globl cpu_pj4b_suspend_size .equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5 diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index d65a12f851a9..d4675603593b 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -8,18 +8,19 @@ * This is the "shell" of the ARMv7-M processor support. */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/page.h> #include <asm/v7m.h> #include "proc-macros.S" -ENTRY(cpu_v7m_proc_init) +SYM_TYPED_FUNC_START(cpu_v7m_proc_init) ret lr -ENDPROC(cpu_v7m_proc_init) +SYM_FUNC_END(cpu_v7m_proc_init) -ENTRY(cpu_v7m_proc_fin) +SYM_TYPED_FUNC_START(cpu_v7m_proc_fin) ret lr -ENDPROC(cpu_v7m_proc_fin) +SYM_FUNC_END(cpu_v7m_proc_fin) /* * cpu_v7m_reset(loc) @@ -31,9 +32,9 @@ ENDPROC(cpu_v7m_proc_fin) * - loc - location to jump to for soft reset */ .align 5 -ENTRY(cpu_v7m_reset) +SYM_TYPED_FUNC_START(cpu_v7m_reset) ret r0 -ENDPROC(cpu_v7m_reset) +SYM_FUNC_END(cpu_v7m_reset) /* * cpu_v7m_do_idle() @@ -42,36 +43,36 @@ ENDPROC(cpu_v7m_reset) * * IRQs are already disabled. */ -ENTRY(cpu_v7m_do_idle) +SYM_TYPED_FUNC_START(cpu_v7m_do_idle) wfi ret lr -ENDPROC(cpu_v7m_do_idle) +SYM_FUNC_END(cpu_v7m_do_idle) -ENTRY(cpu_v7m_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_v7m_dcache_clean_area) ret lr -ENDPROC(cpu_v7m_dcache_clean_area) +SYM_FUNC_END(cpu_v7m_dcache_clean_area) /* * There is no MMU, so here is nothing to do. */ -ENTRY(cpu_v7m_switch_mm) +SYM_TYPED_FUNC_START(cpu_v7m_switch_mm) ret lr -ENDPROC(cpu_v7m_switch_mm) +SYM_FUNC_END(cpu_v7m_switch_mm) .globl cpu_v7m_suspend_size .equ cpu_v7m_suspend_size, 0 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_v7m_do_suspend) +SYM_TYPED_FUNC_START(cpu_v7m_do_suspend) ret lr -ENDPROC(cpu_v7m_do_suspend) +SYM_FUNC_END(cpu_v7m_do_suspend) -ENTRY(cpu_v7m_do_resume) +SYM_TYPED_FUNC_START(cpu_v7m_do_resume) ret lr -ENDPROC(cpu_v7m_do_resume) +SYM_FUNC_END(cpu_v7m_do_resume) #endif -ENTRY(cpu_cm7_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_cm7_dcache_clean_area) dcache_line_size r2, r3 movw r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC movt r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC @@ -82,16 +83,16 @@ ENTRY(cpu_cm7_dcache_clean_area) bhi 1b dsb ret lr -ENDPROC(cpu_cm7_dcache_clean_area) +SYM_FUNC_END(cpu_cm7_dcache_clean_area) -ENTRY(cpu_cm7_proc_fin) +SYM_TYPED_FUNC_START(cpu_cm7_proc_fin) movw r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) movt r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) ldr r0, [r2] bic r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC) str r0, [r2] ret lr -ENDPROC(cpu_cm7_proc_fin) +SYM_FUNC_END(cpu_cm7_proc_fin) .section ".init.text", "ax" diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index a17afe7e195a..14927b380452 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -23,6 +23,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -79,18 +80,20 @@ * * Nothing too exciting at the moment */ -ENTRY(cpu_xsc3_proc_init) +SYM_TYPED_FUNC_START(cpu_xsc3_proc_init) ret lr +SYM_FUNC_END(cpu_xsc3_proc_init) /* * cpu_xsc3_proc_fin() */ -ENTRY(cpu_xsc3_proc_fin) +SYM_TYPED_FUNC_START(cpu_xsc3_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_xsc3_proc_fin) /* * cpu_xsc3_reset(loc) @@ -103,7 +106,7 @@ ENTRY(cpu_xsc3_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_xsc3_reset) +SYM_TYPED_FUNC_START(cpu_xsc3_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR mrc p15, 0, r1, c1, c0, 0 @ ctrl register @@ -117,7 +120,7 @@ ENTRY(cpu_xsc3_reset) @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs ret r0 -ENDPROC(cpu_xsc3_reset) +SYM_FUNC_END(cpu_xsc3_reset) .popsection /* @@ -132,10 +135,11 @@ ENDPROC(cpu_xsc3_reset) */ .align 5 -ENTRY(cpu_xsc3_do_idle) +SYM_TYPED_FUNC_START(cpu_xsc3_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ go to idle ret lr +SYM_FUNC_END(cpu_xsc3_do_idle) /* ================================= CACHE ================================ */ @@ -144,11 +148,11 @@ ENTRY(cpu_xsc3_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(xsc3_flush_icache_all) +SYM_TYPED_FUNC_START(xsc3_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(xsc3_flush_icache_all) +SYM_FUNC_END(xsc3_flush_icache_all) /* * flush_user_cache_all() @@ -156,15 +160,14 @@ ENDPROC(xsc3_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(xsc3_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(xsc3_flush_user_cache_all, xsc3_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(xsc3_flush_kern_cache_all) +SYM_TYPED_FUNC_START(xsc3_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -174,6 +177,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(xsc3_flush_kern_cache_all) /* * flush_user_cache_range(start, end, vm_flags) @@ -186,7 +190,7 @@ __flush_whole_cache: * - vma - vma_area_struct describing address space */ .align 5 -ENTRY(xsc3_flush_user_cache_range) +SYM_TYPED_FUNC_START(xsc3_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #MAX_AREA_SIZE @@ -203,6 +207,7 @@ ENTRY(xsc3_flush_user_cache_range) mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(xsc3_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -217,9 +222,13 @@ ENTRY(xsc3_flush_user_cache_range) * Note: single I-cache line invalidation isn't used here since * it also trashes the mini I-cache used by JTAG debuggers. */ -ENTRY(xsc3_coherent_kern_range) -/* FALLTHROUGH */ -ENTRY(xsc3_coherent_user_range) +SYM_TYPED_FUNC_START(xsc3_coherent_kern_range) +#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ + b xsc3_coherent_user_range +#endif +SYM_FUNC_END(xsc3_coherent_kern_range) + +SYM_TYPED_FUNC_START(xsc3_coherent_user_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line add r0, r0, #CACHELINESIZE @@ -230,6 +239,7 @@ ENTRY(xsc3_coherent_user_range) mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(xsc3_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -240,7 +250,7 @@ ENTRY(xsc3_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(xsc3_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(xsc3_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line add r0, r0, #CACHELINESIZE @@ -251,6 +261,7 @@ ENTRY(xsc3_flush_kern_dcache_area) mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush ret lr +SYM_FUNC_END(xsc3_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -301,7 +312,7 @@ xsc3_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(xsc3_dma_flush_range) +SYM_TYPED_FUNC_START(xsc3_dma_flush_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line add r0, r0, #CACHELINESIZE @@ -309,6 +320,7 @@ ENTRY(xsc3_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier ret lr +SYM_FUNC_END(xsc3_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -316,13 +328,13 @@ ENTRY(xsc3_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(xsc3_dma_map_area) +SYM_TYPED_FUNC_START(xsc3_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq xsc3_dma_clean_range bcs xsc3_dma_inv_range b xsc3_dma_flush_range -ENDPROC(xsc3_dma_map_area) +SYM_FUNC_END(xsc3_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -330,22 +342,17 @@ ENDPROC(xsc3_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(xsc3_dma_unmap_area) +SYM_TYPED_FUNC_START(xsc3_dma_unmap_area) ret lr -ENDPROC(xsc3_dma_unmap_area) - - .globl xsc3_flush_kern_cache_louis - .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions xsc3 +SYM_FUNC_END(xsc3_dma_unmap_area) -ENTRY(cpu_xsc3_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_xsc3_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b ret lr +SYM_FUNC_END(cpu_xsc3_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -357,7 +364,7 @@ ENTRY(cpu_xsc3_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_xsc3_switch_mm) +SYM_TYPED_FUNC_START(cpu_xsc3_switch_mm) clean_d_cache r1, r2 mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, ip, c7, c10, 4 @ data write barrier @@ -366,6 +373,7 @@ ENTRY(cpu_xsc3_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs cpwait_ret lr, ip +SYM_FUNC_END(cpu_xsc3_switch_mm) /* * cpu_xsc3_set_pte_ext(ptep, pte, ext) @@ -391,7 +399,7 @@ cpu_xsc3_mt_table: .long 0x00 @ unused .align 5 -ENTRY(cpu_xsc3_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_xsc3_set_pte_ext) xscale_set_pte_ext_prologue tst r1, #L_PTE_SHARED @ shared? @@ -404,6 +412,7 @@ ENTRY(cpu_xsc3_set_pte_ext) xscale_set_pte_ext_epilogue ret lr +SYM_FUNC_END(cpu_xsc3_set_pte_ext) .ltorg .align @@ -411,7 +420,7 @@ ENTRY(cpu_xsc3_set_pte_ext) .globl cpu_xsc3_suspend_size .equ cpu_xsc3_suspend_size, 4 * 6 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_xsc3_do_suspend) +SYM_TYPED_FUNC_START(cpu_xsc3_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode mrc p15, 0, r5, c15, c1, 0 @ CP access reg @@ -422,9 +431,9 @@ ENTRY(cpu_xsc3_do_suspend) bic r4, r4, #2 @ clear frequency change bit stmia r0, {r4 - r9} @ store cp regs ldmia sp!, {r4 - r9, pc} -ENDPROC(cpu_xsc3_do_suspend) +SYM_FUNC_END(cpu_xsc3_do_suspend) -ENTRY(cpu_xsc3_do_resume) +SYM_TYPED_FUNC_START(cpu_xsc3_do_resume) ldmia r0, {r4 - r9} @ load cp regs mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB @@ -440,7 +449,7 @@ ENTRY(cpu_xsc3_do_resume) mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg mov r0, r9 @ control register b cpu_resume_mmu -ENDPROC(cpu_xsc3_do_resume) +SYM_FUNC_END(cpu_xsc3_do_resume) #endif .type __xsc3_setup, #function diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index d82590aa71c0..d8462df8020b 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -19,6 +19,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <linux/pgtable.h> #include <asm/assembler.h> #include <asm/hwcap.h> @@ -111,22 +112,24 @@ clean_addr: .word CLEAN_ADDR * * Nothing too exciting at the moment */ -ENTRY(cpu_xscale_proc_init) +SYM_TYPED_FUNC_START(cpu_xscale_proc_init) @ enable write buffer coalescing. Some bootloader disable it mrc p15, 0, r1, c1, c0, 1 bic r1, r1, #1 mcr p15, 0, r1, c1, c0, 1 ret lr +SYM_FUNC_END(cpu_xscale_proc_init) /* * cpu_xscale_proc_fin() */ -ENTRY(cpu_xscale_proc_fin) +SYM_TYPED_FUNC_START(cpu_xscale_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches ret lr +SYM_FUNC_END(cpu_xscale_proc_fin) /* * cpu_xscale_reset(loc) @@ -141,7 +144,7 @@ ENTRY(cpu_xscale_proc_fin) */ .align 5 .pushsection .idmap.text, "ax" -ENTRY(cpu_xscale_reset) +SYM_TYPED_FUNC_START(cpu_xscale_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB @@ -159,7 +162,7 @@ ENTRY(cpu_xscale_reset) @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ret r0 -ENDPROC(cpu_xscale_reset) +SYM_FUNC_END(cpu_xscale_reset) .popsection /* @@ -174,10 +177,11 @@ ENDPROC(cpu_xscale_reset) */ .align 5 -ENTRY(cpu_xscale_do_idle) +SYM_TYPED_FUNC_START(cpu_xscale_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE ret lr +SYM_FUNC_END(cpu_xscale_do_idle) /* ================================= CACHE ================================ */ @@ -186,11 +190,11 @@ ENTRY(cpu_xscale_do_idle) * * Unconditionally clean and invalidate the entire icache. */ -ENTRY(xscale_flush_icache_all) +SYM_TYPED_FUNC_START(xscale_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache ret lr -ENDPROC(xscale_flush_icache_all) +SYM_FUNC_END(xscale_flush_icache_all) /* * flush_user_cache_all() @@ -198,15 +202,14 @@ ENDPROC(xscale_flush_icache_all) * Invalidate all cache entries in a particular address * space. */ -ENTRY(xscale_flush_user_cache_all) - /* FALLTHROUGH */ +SYM_FUNC_ALIAS(xscale_flush_user_cache_all, xscale_flush_kern_cache_all) /* * flush_kern_cache_all() * * Clean and invalidate the entire cache. */ -ENTRY(xscale_flush_kern_cache_all) +SYM_TYPED_FUNC_START(xscale_flush_kern_cache_all) mov r2, #VM_EXEC mov ip, #0 __flush_whole_cache: @@ -215,6 +218,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_flush_kern_cache_all) /* * flush_user_cache_range(start, end, vm_flags) @@ -227,7 +231,7 @@ __flush_whole_cache: * - vma - vma_area_struct describing address space */ .align 5 -ENTRY(xscale_flush_user_cache_range) +SYM_TYPED_FUNC_START(xscale_flush_user_cache_range) mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #MAX_AREA_SIZE @@ -244,6 +248,7 @@ ENTRY(xscale_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_flush_user_cache_range) /* * coherent_kern_range(start, end) @@ -258,7 +263,7 @@ ENTRY(xscale_flush_user_cache_range) * Note: single I-cache line invalidation isn't used here since * it also trashes the mini I-cache used by JTAG debuggers. */ -ENTRY(xscale_coherent_kern_range) +SYM_TYPED_FUNC_START(xscale_coherent_kern_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHELINESIZE @@ -268,6 +273,7 @@ ENTRY(xscale_coherent_kern_range) mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_coherent_kern_range) /* * coherent_user_range(start, end) @@ -279,7 +285,7 @@ ENTRY(xscale_coherent_kern_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_coherent_user_range) +SYM_TYPED_FUNC_START(xscale_coherent_user_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry @@ -290,6 +296,7 @@ ENTRY(xscale_coherent_user_range) mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_coherent_user_range) /* * flush_kern_dcache_area(void *addr, size_t size) @@ -300,7 +307,7 @@ ENTRY(xscale_coherent_user_range) * - addr - kernel address * - size - region size */ -ENTRY(xscale_flush_kern_dcache_area) +SYM_TYPED_FUNC_START(xscale_flush_kern_dcache_area) add r1, r0, r1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry @@ -311,6 +318,7 @@ ENTRY(xscale_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_flush_kern_dcache_area) /* * dma_inv_range(start, end) @@ -361,7 +369,7 @@ xscale_dma_clean_range: * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_dma_flush_range) +SYM_TYPED_FUNC_START(xscale_dma_flush_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry @@ -370,6 +378,7 @@ ENTRY(xscale_dma_flush_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer ret lr +SYM_FUNC_END(xscale_dma_flush_range) /* * dma_map_area(start, size, dir) @@ -377,13 +386,27 @@ ENTRY(xscale_dma_flush_range) * - size - size of region * - dir - DMA direction */ -ENTRY(xscale_dma_map_area) +SYM_TYPED_FUNC_START(xscale_dma_map_area) add r1, r1, r0 cmp r2, #DMA_TO_DEVICE beq xscale_dma_clean_range bcs xscale_dma_inv_range b xscale_dma_flush_range -ENDPROC(xscale_dma_map_area) +SYM_FUNC_END(xscale_dma_map_area) + +/* + * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't + * clear the dirty bits, which means that if we invalidate a dirty line, + * the dirty data can still be written back to external memory later on. + * + * The recommended workaround is to always do a clean D-cache line before + * doing an invalidate D-cache line, so on the affected processors, + * dma_inv_range() is implemented as dma_flush_range(). + * + * See erratum #25 of "Intel 80200 Processor Specification Update", + * revision January 22, 2003, available at: + * http://www.intel.com/design/iio/specupdt/273415.htm + */ /* * dma_map_area(start, size, dir) @@ -391,12 +414,12 @@ ENDPROC(xscale_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(xscale_80200_A0_A1_dma_map_area) +SYM_TYPED_FUNC_START(xscale_80200_A0_A1_dma_map_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE beq xscale_dma_clean_range b xscale_dma_flush_range -ENDPROC(xscale_80200_A0_A1_dma_map_area) +SYM_FUNC_END(xscale_80200_A0_A1_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -404,59 +427,17 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(xscale_dma_unmap_area) +SYM_TYPED_FUNC_START(xscale_dma_unmap_area) ret lr -ENDPROC(xscale_dma_unmap_area) - - .globl xscale_flush_kern_cache_louis - .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all +SYM_FUNC_END(xscale_dma_unmap_area) - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions xscale - -/* - * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't - * clear the dirty bits, which means that if we invalidate a dirty line, - * the dirty data can still be written back to external memory later on. - * - * The recommended workaround is to always do a clean D-cache line before - * doing an invalidate D-cache line, so on the affected processors, - * dma_inv_range() is implemented as dma_flush_range(). - * - * See erratum #25 of "Intel 80200 Processor Specification Update", - * revision January 22, 2003, available at: - * http://www.intel.com/design/iio/specupdt/273415.htm - */ -.macro a0_alias basename - .globl xscale_80200_A0_A1_\basename - .type xscale_80200_A0_A1_\basename , %function - .equ xscale_80200_A0_A1_\basename , xscale_\basename -.endm - -/* - * Most of the cache functions are unchanged for these processor revisions. - * Export suitable alias symbols for the unchanged functions: - */ - a0_alias flush_icache_all - a0_alias flush_user_cache_all - a0_alias flush_kern_cache_all - a0_alias flush_kern_cache_louis - a0_alias flush_user_cache_range - a0_alias coherent_kern_range - a0_alias coherent_user_range - a0_alias flush_kern_dcache_area - a0_alias dma_flush_range - a0_alias dma_unmap_area - - @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) - define_cache_functions xscale_80200_A0_A1 - -ENTRY(cpu_xscale_dcache_clean_area) +SYM_TYPED_FUNC_START(cpu_xscale_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b ret lr +SYM_FUNC_END(cpu_xscale_dcache_clean_area) /* =============================== PageTable ============================== */ @@ -468,13 +449,14 @@ ENTRY(cpu_xscale_dcache_clean_area) * pgd: new page tables */ .align 5 -ENTRY(cpu_xscale_switch_mm) +SYM_TYPED_FUNC_START(cpu_xscale_switch_mm) clean_d_cache r1, r2 mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs cpwait_ret lr, ip +SYM_FUNC_END(cpu_xscale_switch_mm) /* * cpu_xscale_set_pte_ext(ptep, pte, ext) @@ -502,7 +484,7 @@ cpu_xscale_mt_table: .long 0x00 @ unused .align 5 -ENTRY(cpu_xscale_set_pte_ext) +SYM_TYPED_FUNC_START(cpu_xscale_set_pte_ext) xscale_set_pte_ext_prologue @ @@ -520,6 +502,7 @@ ENTRY(cpu_xscale_set_pte_ext) xscale_set_pte_ext_epilogue ret lr +SYM_FUNC_END(cpu_xscale_set_pte_ext) .ltorg .align @@ -527,7 +510,7 @@ ENTRY(cpu_xscale_set_pte_ext) .globl cpu_xscale_suspend_size .equ cpu_xscale_suspend_size, 4 * 6 #ifdef CONFIG_ARM_CPU_SUSPEND -ENTRY(cpu_xscale_do_suspend) +SYM_TYPED_FUNC_START(cpu_xscale_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode mrc p15, 0, r5, c15, c1, 0 @ CP access reg @@ -538,9 +521,9 @@ ENTRY(cpu_xscale_do_suspend) bic r4, r4, #2 @ clear frequency change bit stmia r0, {r4 - r9} @ store cp regs ldmfd sp!, {r4 - r9, pc} -ENDPROC(cpu_xscale_do_suspend) +SYM_FUNC_END(cpu_xscale_do_suspend) -ENTRY(cpu_xscale_do_resume) +SYM_TYPED_FUNC_START(cpu_xscale_do_resume) ldmia r0, {r4 - r9} @ load cp regs mov ip, #0 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs @@ -553,7 +536,7 @@ ENTRY(cpu_xscale_do_resume) mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg mov r0, r9 @ control register b cpu_resume_mmu -ENDPROC(cpu_xscale_do_resume) +SYM_FUNC_END(cpu_xscale_do_resume) #endif .type __xscale_setup, #function diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c new file mode 100644 index 000000000000..bdbbf65d1b36 --- /dev/null +++ b/arch/arm/mm/proc.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file defines C prototypes for the low-level processor assembly functions + * and creates a reference for CFI. This needs to be done for every assembly + * processor ("proc") function that is called from C but does not have a + * corresponding C implementation. + * + * Processors are listed in the order they appear in the Makefile. + * + * Functions are listed if and only if they see use on the target CPU, and in + * the order they are defined in struct processor. + */ +#include <asm/proc-fns.h> + +#ifdef CONFIG_CPU_ARM7TDMI +void cpu_arm7tdmi_proc_init(void); +__ADDRESSABLE(cpu_arm7tdmi_proc_init); +void cpu_arm7tdmi_proc_fin(void); +__ADDRESSABLE(cpu_arm7tdmi_proc_fin); +void cpu_arm7tdmi_reset(void); +__ADDRESSABLE(cpu_arm7tdmi_reset); +int cpu_arm7tdmi_do_idle(void); +__ADDRESSABLE(cpu_arm7tdmi_do_idle); +void cpu_arm7tdmi_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm7tdmi_dcache_clean_area); +void cpu_arm7tdmi_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm7tdmi_switch_mm); +#endif + +#ifdef CONFIG_CPU_ARM720T +void cpu_arm720_proc_init(void); +__ADDRESSABLE(cpu_arm720_proc_init); +void cpu_arm720_proc_fin(void); +__ADDRESSABLE(cpu_arm720_proc_fin); +void cpu_arm720_reset(void); +__ADDRESSABLE(cpu_arm720_reset); +int cpu_arm720_do_idle(void); +__ADDRESSABLE(cpu_arm720_do_idle); +void cpu_arm720_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm720_dcache_clean_area); +void cpu_arm720_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm720_switch_mm); +void cpu_arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm720_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM740T +void cpu_arm740_proc_init(void); +__ADDRESSABLE(cpu_arm740_proc_init); +void cpu_arm740_proc_fin(void); +__ADDRESSABLE(cpu_arm740_proc_fin); +void cpu_arm740_reset(void); +__ADDRESSABLE(cpu_arm740_reset); +int cpu_arm740_do_idle(void); +__ADDRESSABLE(cpu_arm740_do_idle); +void cpu_arm740_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm740_dcache_clean_area); +void cpu_arm740_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm740_switch_mm); +#endif + +#ifdef CONFIG_CPU_ARM9TDMI +void cpu_arm9tdmi_proc_init(void); +__ADDRESSABLE(cpu_arm9tdmi_proc_init); +void cpu_arm9tdmi_proc_fin(void); +__ADDRESSABLE(cpu_arm9tdmi_proc_fin); +void cpu_arm9tdmi_reset(void); +__ADDRESSABLE(cpu_arm9tdmi_reset); +int cpu_arm9tdmi_do_idle(void); +__ADDRESSABLE(cpu_arm9tdmi_do_idle); +void cpu_arm9tdmi_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm9tdmi_dcache_clean_area); +void cpu_arm9tdmi_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm9tdmi_switch_mm); +#endif + +#ifdef CONFIG_CPU_ARM920T +void cpu_arm920_proc_init(void); +__ADDRESSABLE(cpu_arm920_proc_init); +void cpu_arm920_proc_fin(void); +__ADDRESSABLE(cpu_arm920_proc_fin); +void cpu_arm920_reset(void); +__ADDRESSABLE(cpu_arm920_reset); +int cpu_arm920_do_idle(void); +__ADDRESSABLE(cpu_arm920_do_idle); +void cpu_arm920_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm920_dcache_clean_area); +void cpu_arm920_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm920_switch_mm); +void cpu_arm920_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm920_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_arm920_do_suspend(void *); +__ADDRESSABLE(cpu_arm920_do_suspend); +void cpu_arm920_do_resume(void *); +__ADDRESSABLE(cpu_arm920_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_ARM920T */ + +#ifdef CONFIG_CPU_ARM922T +void cpu_arm922_proc_init(void); +__ADDRESSABLE(cpu_arm922_proc_init); +void cpu_arm922_proc_fin(void); +__ADDRESSABLE(cpu_arm922_proc_fin); +void cpu_arm922_reset(void); +__ADDRESSABLE(cpu_arm922_reset); +int cpu_arm922_do_idle(void); +__ADDRESSABLE(cpu_arm922_do_idle); +void cpu_arm922_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm922_dcache_clean_area); +void cpu_arm922_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm922_switch_mm); +void cpu_arm922_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm922_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM925T +void cpu_arm925_proc_init(void); +__ADDRESSABLE(cpu_arm925_proc_init); +void cpu_arm925_proc_fin(void); +__ADDRESSABLE(cpu_arm925_proc_fin); +void cpu_arm925_reset(void); +__ADDRESSABLE(cpu_arm925_reset); +int cpu_arm925_do_idle(void); +__ADDRESSABLE(cpu_arm925_do_idle); +void cpu_arm925_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm925_dcache_clean_area); +void cpu_arm925_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm925_switch_mm); +void cpu_arm925_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm925_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM926T +void cpu_arm926_proc_init(void); +__ADDRESSABLE(cpu_arm926_proc_init); +void cpu_arm926_proc_fin(void); +__ADDRESSABLE(cpu_arm926_proc_fin); +void cpu_arm926_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_arm926_reset); +int cpu_arm926_do_idle(void); +__ADDRESSABLE(cpu_arm926_do_idle); +void cpu_arm926_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm926_dcache_clean_area); +void cpu_arm926_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm926_switch_mm); +void cpu_arm926_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm926_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_arm926_do_suspend(void *); +__ADDRESSABLE(cpu_arm926_do_suspend); +void cpu_arm926_do_resume(void *); +__ADDRESSABLE(cpu_arm926_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_ARM926T */ + +#ifdef CONFIG_CPU_ARM940T +void cpu_arm940_proc_init(void); +__ADDRESSABLE(cpu_arm940_proc_init); +void cpu_arm940_proc_fin(void); +__ADDRESSABLE(cpu_arm940_proc_fin); +void cpu_arm940_reset(void); +__ADDRESSABLE(cpu_arm940_reset); +int cpu_arm940_do_idle(void); +__ADDRESSABLE(cpu_arm940_do_idle); +void cpu_arm940_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm940_dcache_clean_area); +void cpu_arm940_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm940_switch_mm); +#endif + +#ifdef CONFIG_CPU_ARM946E +void cpu_arm946_proc_init(void); +__ADDRESSABLE(cpu_arm946_proc_init); +void cpu_arm946_proc_fin(void); +__ADDRESSABLE(cpu_arm946_proc_fin); +void cpu_arm946_reset(void); +__ADDRESSABLE(cpu_arm946_reset); +int cpu_arm946_do_idle(void); +__ADDRESSABLE(cpu_arm946_do_idle); +void cpu_arm946_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm946_dcache_clean_area); +void cpu_arm946_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm946_switch_mm); +#endif + +#ifdef CONFIG_CPU_FA526 +void cpu_fa526_proc_init(void); +__ADDRESSABLE(cpu_fa526_proc_init); +void cpu_fa526_proc_fin(void); +__ADDRESSABLE(cpu_fa526_proc_fin); +void cpu_fa526_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_fa526_reset); +int cpu_fa526_do_idle(void); +__ADDRESSABLE(cpu_fa526_do_idle); +void cpu_fa526_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_fa526_dcache_clean_area); +void cpu_fa526_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_fa526_switch_mm); +void cpu_fa526_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_fa526_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM1020 +void cpu_arm1020_proc_init(void); +__ADDRESSABLE(cpu_arm1020_proc_init); +void cpu_arm1020_proc_fin(void); +__ADDRESSABLE(cpu_arm1020_proc_fin); +void cpu_arm1020_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_arm1020_reset); +int cpu_arm1020_do_idle(void); +__ADDRESSABLE(cpu_arm1020_do_idle); +void cpu_arm1020_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm1020_dcache_clean_area); +void cpu_arm1020_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm1020_switch_mm); +void cpu_arm1020_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm1020_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM1020E +void cpu_arm1020e_proc_init(void); +__ADDRESSABLE(cpu_arm1020e_proc_init); +void cpu_arm1020e_proc_fin(void); +__ADDRESSABLE(cpu_arm1020e_proc_fin); +void cpu_arm1020e_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_arm1020e_reset); +int cpu_arm1020e_do_idle(void); +__ADDRESSABLE(cpu_arm1020e_do_idle); +void cpu_arm1020e_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm1020e_dcache_clean_area); +void cpu_arm1020e_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm1020e_switch_mm); +void cpu_arm1020e_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm1020e_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM1022 +void cpu_arm1022_proc_init(void); +__ADDRESSABLE(cpu_arm1022_proc_init); +void cpu_arm1022_proc_fin(void); +__ADDRESSABLE(cpu_arm1022_proc_fin); +void cpu_arm1022_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_arm1022_reset); +int cpu_arm1022_do_idle(void); +__ADDRESSABLE(cpu_arm1022_do_idle); +void cpu_arm1022_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm1022_dcache_clean_area); +void cpu_arm1022_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm1022_switch_mm); +void cpu_arm1022_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm1022_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_ARM1026 +void cpu_arm1026_proc_init(void); +__ADDRESSABLE(cpu_arm1026_proc_init); +void cpu_arm1026_proc_fin(void); +__ADDRESSABLE(cpu_arm1026_proc_fin); +void cpu_arm1026_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_arm1026_reset); +int cpu_arm1026_do_idle(void); +__ADDRESSABLE(cpu_arm1026_do_idle); +void cpu_arm1026_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_arm1026_dcache_clean_area); +void cpu_arm1026_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_arm1026_switch_mm); +void cpu_arm1026_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_arm1026_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_SA110 +void cpu_sa110_proc_init(void); +__ADDRESSABLE(cpu_sa110_proc_init); +void cpu_sa110_proc_fin(void); +__ADDRESSABLE(cpu_sa110_proc_fin); +void cpu_sa110_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_sa110_reset); +int cpu_sa110_do_idle(void); +__ADDRESSABLE(cpu_sa110_do_idle); +void cpu_sa110_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_sa110_dcache_clean_area); +void cpu_sa110_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_sa110_switch_mm); +void cpu_sa110_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_sa110_set_pte_ext); +#endif + +#ifdef CONFIG_CPU_SA1100 +void cpu_sa1100_proc_init(void); +__ADDRESSABLE(cpu_sa1100_proc_init); +void cpu_sa1100_proc_fin(void); +__ADDRESSABLE(cpu_sa1100_proc_fin); +void cpu_sa1100_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_sa1100_reset); +int cpu_sa1100_do_idle(void); +__ADDRESSABLE(cpu_sa1100_do_idle); +void cpu_sa1100_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_sa1100_dcache_clean_area); +void cpu_sa1100_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_sa1100_switch_mm); +void cpu_sa1100_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_sa1100_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_sa1100_do_suspend(void *); +__ADDRESSABLE(cpu_sa1100_do_suspend); +void cpu_sa1100_do_resume(void *); +__ADDRESSABLE(cpu_sa1100_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_SA1100 */ + +#ifdef CONFIG_CPU_XSCALE +void cpu_xscale_proc_init(void); +__ADDRESSABLE(cpu_xscale_proc_init); +void cpu_xscale_proc_fin(void); +__ADDRESSABLE(cpu_xscale_proc_fin); +void cpu_xscale_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_xscale_reset); +int cpu_xscale_do_idle(void); +__ADDRESSABLE(cpu_xscale_do_idle); +void cpu_xscale_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_xscale_dcache_clean_area); +void cpu_xscale_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_xscale_switch_mm); +void cpu_xscale_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_xscale_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_xscale_do_suspend(void *); +__ADDRESSABLE(cpu_xscale_do_suspend); +void cpu_xscale_do_resume(void *); +__ADDRESSABLE(cpu_xscale_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_XSCALE */ + +#ifdef CONFIG_CPU_XSC3 +void cpu_xsc3_proc_init(void); +__ADDRESSABLE(cpu_xsc3_proc_init); +void cpu_xsc3_proc_fin(void); +__ADDRESSABLE(cpu_xsc3_proc_fin); +void cpu_xsc3_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_xsc3_reset); +int cpu_xsc3_do_idle(void); +__ADDRESSABLE(cpu_xsc3_do_idle); +void cpu_xsc3_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_xsc3_dcache_clean_area); +void cpu_xsc3_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_xsc3_switch_mm); +void cpu_xsc3_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_xsc3_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_xsc3_do_suspend(void *); +__ADDRESSABLE(cpu_xsc3_do_suspend); +void cpu_xsc3_do_resume(void *); +__ADDRESSABLE(cpu_xsc3_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_XSC3 */ + +#ifdef CONFIG_CPU_MOHAWK +void cpu_mohawk_proc_init(void); +__ADDRESSABLE(cpu_mohawk_proc_init); +void cpu_mohawk_proc_fin(void); +__ADDRESSABLE(cpu_mohawk_proc_fin); +void cpu_mohawk_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_mohawk_reset); +int cpu_mohawk_do_idle(void); +__ADDRESSABLE(cpu_mohawk_do_idle); +void cpu_mohawk_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_mohawk_dcache_clean_area); +void cpu_mohawk_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_mohawk_switch_mm); +void cpu_mohawk_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_mohawk_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_mohawk_do_suspend(void *); +__ADDRESSABLE(cpu_mohawk_do_suspend); +void cpu_mohawk_do_resume(void *); +__ADDRESSABLE(cpu_mohawk_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_MOHAWK */ + +#ifdef CONFIG_CPU_FEROCEON +void cpu_feroceon_proc_init(void); +__ADDRESSABLE(cpu_feroceon_proc_init); +void cpu_feroceon_proc_fin(void); +__ADDRESSABLE(cpu_feroceon_proc_fin); +void cpu_feroceon_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_feroceon_reset); +int cpu_feroceon_do_idle(void); +__ADDRESSABLE(cpu_feroceon_do_idle); +void cpu_feroceon_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_feroceon_dcache_clean_area); +void cpu_feroceon_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_feroceon_switch_mm); +void cpu_feroceon_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_feroceon_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_feroceon_do_suspend(void *); +__ADDRESSABLE(cpu_feroceon_do_suspend); +void cpu_feroceon_do_resume(void *); +__ADDRESSABLE(cpu_feroceon_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_FEROCEON */ + +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) +void cpu_v6_proc_init(void); +__ADDRESSABLE(cpu_v6_proc_init); +void cpu_v6_proc_fin(void); +__ADDRESSABLE(cpu_v6_proc_fin); +void cpu_v6_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_v6_reset); +int cpu_v6_do_idle(void); +__ADDRESSABLE(cpu_v6_do_idle); +void cpu_v6_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_v6_dcache_clean_area); +void cpu_v6_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_v6_switch_mm); +void cpu_v6_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_v6_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_v6_do_suspend(void *); +__ADDRESSABLE(cpu_v6_do_suspend); +void cpu_v6_do_resume(void *); +__ADDRESSABLE(cpu_v6_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CPU_V6 */ + +#ifdef CONFIG_CPU_V7 +void cpu_v7_proc_init(void); +__ADDRESSABLE(cpu_v7_proc_init); +void cpu_v7_proc_fin(void); +__ADDRESSABLE(cpu_v7_proc_fin); +void cpu_v7_reset(void); +__ADDRESSABLE(cpu_v7_reset); +int cpu_v7_do_idle(void); +__ADDRESSABLE(cpu_v7_do_idle); +#ifdef CONFIG_PJ4B_ERRATA_4742 +int cpu_pj4b_do_idle(void); +__ADDRESSABLE(cpu_pj4b_do_idle); +#endif +void cpu_v7_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_v7_dcache_clean_area); +void cpu_v7_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +/* Special switch_mm() callbacks to work around bugs in v7 */ +__ADDRESSABLE(cpu_v7_switch_mm); +void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_v7_iciallu_switch_mm); +void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_v7_bpiall_switch_mm); +#ifdef CONFIG_ARM_LPAE +void cpu_v7_set_pte_ext(pte_t *ptep, pte_t pte); +#else +void cpu_v7_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +#endif +__ADDRESSABLE(cpu_v7_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_v7_do_suspend(void *); +__ADDRESSABLE(cpu_v7_do_suspend); +void cpu_v7_do_resume(void *); +__ADDRESSABLE(cpu_v7_do_resume); +/* Special versions of suspend and resume for the CA9MP cores */ +void cpu_ca9mp_do_suspend(void *); +__ADDRESSABLE(cpu_ca9mp_do_suspend); +void cpu_ca9mp_do_resume(void *); +__ADDRESSABLE(cpu_ca9mp_do_resume); +/* Special versions of suspend and resume for the Marvell PJ4B cores */ +#ifdef CONFIG_CPU_PJ4B +void cpu_pj4b_do_suspend(void *); +__ADDRESSABLE(cpu_pj4b_do_suspend); +void cpu_pj4b_do_resume(void *); +__ADDRESSABLE(cpu_pj4b_do_resume); +#endif /* CONFIG_CPU_PJ4B */ +#endif /* CONFIG_ARM_CPU_SUSPEND */ +#endif /* CONFIG_CPU_V7 */ + +#ifdef CONFIG_CPU_V7M +void cpu_v7m_proc_init(void); +__ADDRESSABLE(cpu_v7m_proc_init); +void cpu_v7m_proc_fin(void); +__ADDRESSABLE(cpu_v7m_proc_fin); +void cpu_v7m_reset(unsigned long addr, bool hvc); +__ADDRESSABLE(cpu_v7m_reset); +int cpu_v7m_do_idle(void); +__ADDRESSABLE(cpu_v7m_do_idle); +void cpu_v7m_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_v7m_dcache_clean_area); +void cpu_v7m_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +__ADDRESSABLE(cpu_v7m_switch_mm); +void cpu_v7m_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); +__ADDRESSABLE(cpu_v7m_set_pte_ext); +#ifdef CONFIG_ARM_CPU_SUSPEND +void cpu_v7m_do_suspend(void *); +__ADDRESSABLE(cpu_v7m_do_suspend); +void cpu_v7m_do_resume(void *); +__ADDRESSABLE(cpu_v7m_do_resume); +#endif /* CONFIG_ARM_CPU_SUSPEND */ +void cpu_cm7_proc_fin(void); +__ADDRESSABLE(cpu_cm7_proc_fin); +void cpu_cm7_dcache_clean_area(void *addr, int size); +__ADDRESSABLE(cpu_cm7_dcache_clean_area); +#endif /* CONFIG_CPU_V7M */ diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S index def6161ec452..85a6fe766b21 100644 --- a/arch/arm/mm/tlb-fa.S +++ b/arch/arm/mm/tlb-fa.S @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> @@ -31,7 +32,7 @@ * - mm - mm_struct describing address space */ .align 4 -ENTRY(fa_flush_user_tlb_range) +SYM_TYPED_FUNC_START(fa_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? @@ -46,9 +47,10 @@ ENTRY(fa_flush_user_tlb_range) blo 1b mcr p15, 0, r3, c7, c10, 4 @ data write barrier ret lr +SYM_FUNC_END(fa_flush_user_tlb_range) -ENTRY(fa_flush_kern_tlb_range) +SYM_TYPED_FUNC_START(fa_flush_kern_tlb_range) mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB bic r0, r0, #0x0ff @@ -60,8 +62,4 @@ ENTRY(fa_flush_kern_tlb_range) mcr p15, 0, r3, c7, c10, 4 @ data write barrier mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions fa, fa_tlb_flags +SYM_FUNC_END(fa_flush_kern_tlb_range) diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index b962b4e75158..09ff69008d94 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> @@ -27,7 +28,7 @@ * - mm - mm_struct describing address space */ .align 5 -ENTRY(v4_flush_user_tlb_range) +SYM_TYPED_FUNC_START(v4_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? @@ -40,6 +41,7 @@ ENTRY(v4_flush_user_tlb_range) cmp r0, r1 blo 1b ret lr +SYM_FUNC_END(v4_flush_user_tlb_range) /* * v4_flush_kern_tlb_range(start, end) @@ -50,10 +52,11 @@ ENTRY(v4_flush_user_tlb_range) * - start - virtual address (may not be aligned) * - end - virtual address (may not be aligned) */ +#ifdef CONFIG_CFI_CLANG +SYM_TYPED_FUNC_START(v4_flush_kern_tlb_range) + b .v4_flush_kern_tlb_range +SYM_FUNC_END(v4_flush_kern_tlb_range) +#else .globl v4_flush_kern_tlb_range .equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range - - __INITDATA - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions v4, v4_tlb_flags +#endif diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S index 9348bba7586a..04e46c359e75 100644 --- a/arch/arm/mm/tlb-v4wb.S +++ b/arch/arm/mm/tlb-v4wb.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> @@ -27,7 +28,7 @@ * - mm - mm_struct describing address space */ .align 5 -ENTRY(v4wb_flush_user_tlb_range) +SYM_TYPED_FUNC_START(v4wb_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? @@ -43,6 +44,7 @@ ENTRY(v4wb_flush_user_tlb_range) cmp r0, r1 blo 1b ret lr +SYM_FUNC_END(v4wb_flush_user_tlb_range) /* * v4_flush_kern_tlb_range(start, end) @@ -53,7 +55,7 @@ ENTRY(v4wb_flush_user_tlb_range) * - start - virtual address (may not be aligned) * - end - virtual address (may not be aligned) */ -ENTRY(v4wb_flush_kern_tlb_range) +SYM_TYPED_FUNC_START(v4wb_flush_kern_tlb_range) mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB bic r0, r0, #0x0ff @@ -64,8 +66,4 @@ ENTRY(v4wb_flush_kern_tlb_range) cmp r0, r1 blo 1b ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions v4wb, v4wb_tlb_flags +SYM_FUNC_END(v4wb_flush_kern_tlb_range) diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S index d4f9040a4111..502dfe5628a3 100644 --- a/arch/arm/mm/tlb-v4wbi.S +++ b/arch/arm/mm/tlb-v4wbi.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> @@ -26,7 +27,7 @@ * - mm - mm_struct describing address space */ .align 5 -ENTRY(v4wbi_flush_user_tlb_range) +SYM_TYPED_FUNC_START(v4wbi_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? @@ -43,8 +44,9 @@ ENTRY(v4wbi_flush_user_tlb_range) cmp r0, r1 blo 1b ret lr +SYM_FUNC_END(v4wbi_flush_user_tlb_range) -ENTRY(v4wbi_flush_kern_tlb_range) +SYM_TYPED_FUNC_START(v4wbi_flush_kern_tlb_range) mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB bic r0, r0, #0x0ff @@ -55,8 +57,4 @@ ENTRY(v4wbi_flush_kern_tlb_range) cmp r0, r1 blo 1b ret lr - - __INITDATA - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions v4wbi, v4wbi_tlb_flags +SYM_FUNC_END(v4wbi_flush_kern_tlb_range) diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 1d91e49b2c2d..8256a67ac654 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -9,6 +9,7 @@ */ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/page.h> @@ -32,7 +33,7 @@ * - the "Invalidate single entry" instruction will invalidate * both the I and the D TLBs on Harvard-style TLBs */ -ENTRY(v6wbi_flush_user_tlb_range) +SYM_TYPED_FUNC_START(v6wbi_flush_user_tlb_range) vma_vm_mm r3, r2 @ get vma->vm_mm mov ip, #0 mmid r3, r3 @ get vm_mm->context.id @@ -56,6 +57,7 @@ ENTRY(v6wbi_flush_user_tlb_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier ret lr +SYM_FUNC_END(v6wbi_flush_user_tlb_range) /* * v6wbi_flush_kern_tlb_range(start,end) @@ -65,7 +67,7 @@ ENTRY(v6wbi_flush_user_tlb_range) * - start - start address (may not be aligned) * - end - end address (exclusive, may not be aligned) */ -ENTRY(v6wbi_flush_kern_tlb_range) +SYM_TYPED_FUNC_START(v6wbi_flush_kern_tlb_range) mov r2, #0 mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mov r0, r0, lsr #PAGE_SHIFT @ align address @@ -85,8 +87,4 @@ ENTRY(v6wbi_flush_kern_tlb_range) mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) ret lr - - __INIT - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions v6wbi, v6wbi_tlb_flags +SYM_FUNC_END(v6wbi_flush_kern_tlb_range) diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 35fd6d4f0d03..f1aa0764a2cc 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -10,6 +10,7 @@ */ #include <linux/init.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/page.h> @@ -31,7 +32,7 @@ * - the "Invalidate single entry" instruction will invalidate * both the I and the D TLBs on Harvard-style TLBs */ -ENTRY(v7wbi_flush_user_tlb_range) +SYM_TYPED_FUNC_START(v7wbi_flush_user_tlb_range) vma_vm_mm r3, r2 @ get vma->vm_mm mmid r3, r3 @ get vm_mm->context.id dsb ish @@ -57,7 +58,7 @@ ENTRY(v7wbi_flush_user_tlb_range) blo 1b dsb ish ret lr -ENDPROC(v7wbi_flush_user_tlb_range) +SYM_FUNC_END(v7wbi_flush_user_tlb_range) /* * v7wbi_flush_kern_tlb_range(start,end) @@ -67,7 +68,7 @@ ENDPROC(v7wbi_flush_user_tlb_range) * - start - start address (may not be aligned) * - end - end address (exclusive, may not be aligned) */ -ENTRY(v7wbi_flush_kern_tlb_range) +SYM_TYPED_FUNC_START(v7wbi_flush_kern_tlb_range) dsb ish mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT @@ -86,9 +87,4 @@ ENTRY(v7wbi_flush_kern_tlb_range) dsb ish isb ret lr -ENDPROC(v7wbi_flush_kern_tlb_range) - - __INIT - - /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ - define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp +SYM_FUNC_END(v7wbi_flush_kern_tlb_range) diff --git a/arch/arm/mm/tlb.c b/arch/arm/mm/tlb.c new file mode 100644 index 000000000000..42359793120b --- /dev/null +++ b/arch/arm/mm/tlb.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2024 Google LLC +// Author: Ard Biesheuvel <ardb@google.com> + +#include <linux/types.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_CPU_TLB_V4WT +void v4_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void v4_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns v4_tlb_fns __initconst = { + .flush_user_range = v4_flush_user_tlb_range, + .flush_kern_range = v4_flush_kern_tlb_range, + .tlb_flags = v4_tlb_flags, +}; +#endif + +#ifdef CONFIG_CPU_TLB_V4WB +void v4wb_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void v4wb_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns v4wb_tlb_fns __initconst = { + .flush_user_range = v4wb_flush_user_tlb_range, + .flush_kern_range = v4wb_flush_kern_tlb_range, + .tlb_flags = v4wb_tlb_flags, +}; +#endif + +#if defined(CONFIG_CPU_TLB_V4WBI) || defined(CONFIG_CPU_TLB_FEROCEON) +void v4wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void v4wbi_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns v4wbi_tlb_fns __initconst = { + .flush_user_range = v4wbi_flush_user_tlb_range, + .flush_kern_range = v4wbi_flush_kern_tlb_range, + .tlb_flags = v4wbi_tlb_flags, +}; +#endif + +#ifdef CONFIG_CPU_TLB_V6 +void v6wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void v6wbi_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns v6wbi_tlb_fns __initconst = { + .flush_user_range = v6wbi_flush_user_tlb_range, + .flush_kern_range = v6wbi_flush_kern_tlb_range, + .tlb_flags = v6wbi_tlb_flags, +}; +#endif + +#ifdef CONFIG_CPU_TLB_V7 +void v7wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void v7wbi_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns v7wbi_tlb_fns __initconst = { + .flush_user_range = v7wbi_flush_user_tlb_range, + .flush_kern_range = v7wbi_flush_kern_tlb_range, + .tlb_flags = IS_ENABLED(CONFIG_SMP) ? v7wbi_tlb_flags_smp + : v7wbi_tlb_flags_up, +}; + +#ifdef CONFIG_SMP_ON_UP +/* This will be run-time patched so the offset better be right */ +static_assert(offsetof(struct cpu_tlb_fns, tlb_flags) == 8); + +asm(" .pushsection \".alt.smp.init\", \"a\" \n" \ + " .align 2 \n" \ + " .long v7wbi_tlb_fns + 8 - . \n" \ + " .long " __stringify(v7wbi_tlb_flags_up) " \n" \ + " .popsection \n"); +#endif +#endif + +#ifdef CONFIG_CPU_TLB_FA +void fa_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *); +void fa_flush_kern_tlb_range(unsigned long, unsigned long); + +struct cpu_tlb_fns fa_tlb_fns __initconst = { + .flush_user_range = fa_flush_user_tlb_range, + .flush_kern_range = fa_flush_kern_tlb_range, + .tlb_flags = fa_tlb_flags, +}; +#endif diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a04059c31aba..2df0818c3ca9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,6 +105,7 @@ config ARM64 select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_EXECMEM_LATE if EXECMEM select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN diff --git a/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts b/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts index 5afcb8212f49..3f0d3e33894a 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts +++ b/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts @@ -255,7 +255,25 @@ clock-frequency = <400000>; status = "okay"; - /* embedded-controller@76 */ + embedded-controller@76 { + compatible = "acer,aspire1-ec"; + reg = <0x76>; + + interrupts-extended = <&tlmm 30 IRQ_TYPE_LEVEL_LOW>; + + pinctrl-0 = <&ec_int_default>; + pinctrl-names = "default"; + + connector { + compatible = "usb-c-connector"; + + port { + ec_dp_in: endpoint { + remote-endpoint = <&mdss_dp_out>; + }; + }; + }; + }; }; &i2c4 { @@ -419,6 +437,19 @@ status = "okay"; }; +&mdss_dp { + data-lanes = <0 1>; + + vdda-1p2-supply = <&vreg_l3c_1p2>; + vdda-0p9-supply = <&vreg_l4a_0p8>; + + status = "okay"; +}; + +&mdss_dp_out { + remote-endpoint = <&ec_dp_in>; +}; + &mdss_dsi0 { vdda-supply = <&vreg_l3c_1p2>; status = "okay"; @@ -857,6 +888,13 @@ bias-disable; }; + ec_int_default: ec-int-default-state { + pins = "gpio30"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + edp_bridge_irq_default: edp-bridge-irq-default-state { pins = "gpio11"; function = "gpio"; diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 81606bf7d5ac..7abf09df7033 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -404,6 +404,18 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; } +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24b5e6b23417..a6330460d9e5 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -73,10 +73,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, @@ -241,8 +239,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u64 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 975af30af31f..501e3e019c93 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -125,16 +125,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); -} - -static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -587,16 +577,14 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); - if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; } else { val = CPTR_NVHE_EL2_RES1; - if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + if (vcpu_has_sve(vcpu) && guest_owns_fp_regs()) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9e8a496fb284..8170c04fde91 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -211,6 +211,7 @@ typedef unsigned int pkvm_handle_t; struct kvm_protected_vm { pkvm_handle_t handle; struct kvm_hyp_memcache teardown_mc; + bool enabled; }; struct kvm_mpidr_data { @@ -220,20 +221,10 @@ struct kvm_mpidr_data { static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) { - unsigned long mask = data->mpidr_mask; - u64 aff = mpidr & MPIDR_HWID_BITMASK; - int nbits, bit, bit_idx = 0; - u16 index = 0; + unsigned long index = 0, mask = data->mpidr_mask; + unsigned long aff = mpidr & MPIDR_HWID_BITMASK; - /* - * If this looks like RISC-V's BEXT or x86's PEXT - * instructions, it isn't by accident. - */ - nbits = fls(mask); - for_each_set_bit(bit, &mask, nbits) { - index |= (aff & BIT(bit)) >> (bit - bit_idx); - bit_idx++; - } + bitmap_gather(&index, &aff, &mask, fls(mask)); return index; } @@ -530,8 +521,42 @@ struct kvm_cpu_context { u64 *vncr_array; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + struct user_fpsimd_state *fpsimd_state; /* hyp VA */ + + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; + } host_debug_state; }; struct kvm_host_psci_config { @@ -592,19 +617,9 @@ struct kvm_vcpu_arch { u64 mdcr_el2; u64 cptr_el2; - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; - /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; @@ -627,11 +642,10 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. + * + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. * * debug_ptr points to the set of debug registers that should be loaded * onto the hardware when running the guest. @@ -640,18 +654,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ - struct task_struct *parent_task; - - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; @@ -817,8 +819,6 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* vcpu running in HYP context */ -#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) /* SVE enabled for host EL0 */ #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) @@ -896,7 +896,7 @@ struct kvm_vcpu_arch { * Don't bother with VNCR-based accesses in the nVHE code, it has no * business dealing with NV. */ -static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) { #if !defined (__KVM_NVHE_HYPERVISOR__) if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && @@ -906,6 +906,13 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) return (u64 *)&ctxt->sys_regs[r]; } +#define __ctxt_sys_reg(c,r) \ + ({ \ + BUILD_BUG_ON(__builtin_constant_p(r) && \ + (r) >= NR_SYS_REGS); \ + ___ctxt_sys_reg(c, r); \ + }) + #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg); @@ -1168,6 +1175,44 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; +} + +/* Check whether the FP regs are owned by the host */ +static inline bool host_owns_fp_regs(void) +{ + return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED; +} + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ @@ -1211,7 +1256,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { @@ -1247,10 +1291,9 @@ struct kvm *kvm_arch_alloc_vm(void); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) + +#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); @@ -1275,6 +1318,8 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) +#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; @@ -1331,4 +1376,19 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \ get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max)) +/* Check for a given level of PAuth support */ +#define kvm_has_pauth(k, l) \ + ({ \ + bool pa, pi, pa3; \ + \ + pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \ + pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \ + pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \ + pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \ + pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \ + pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \ + \ + (pa + pi + pa3) == 1; \ + }) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 3e2a1ac0c9bb..3e80464f8953 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index c77d795556e1..5e0ab0596246 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -60,7 +60,20 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) return ttbr0 & ~GENMASK_ULL(63, 48); } +extern bool forward_smc_trap(struct kvm_vcpu *vcpu); int kvm_init_nv_sysregs(struct kvm *kvm); +#ifdef CONFIG_ARM64_PTR_AUTH +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr); +#else +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + /* We really should never execute this... */ + WARN_ON_ONCE(1); + *elr = 0xbad9acc0debadbad; + return false; +} +#endif + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0cd0965255d2..d81bac256abc 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -99,5 +99,26 @@ alternative_else_nop_endif .macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ + +#else /* !__ASSEMBLY */ + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ + } while(0) + +#define ptrauth_save_keys(ctxt) \ + do { \ + __ptrauth_save_key(ctxt, APIA); \ + __ptrauth_save_key(ctxt, APIB); \ + __ptrauth_save_key(ctxt, APDA); \ + __ptrauth_save_key(ctxt, APDB); \ + __ptrauth_save_key(ctxt, APGA); \ + } while(0) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_KVM_PTRAUTH_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index ef207a0d4f0d..9943ff0af4c9 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -297,6 +297,7 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_TBID0 (UL(1) << 51) #define TCR_TBID1 (UL(1) << 52) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 261d6e9df2e1..ebf4a9f943ed 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 47e0be610bb6..36b25af56324 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -12,144 +12,18 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> -#include <linux/gfp.h> #include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/random.h> #include <linux/scs.h> -#include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> -static u64 module_direct_base __ro_after_init = 0; -static u64 module_plt_base __ro_after_init = 0; - -/* - * Choose a random page-aligned base address for a window of 'size' bytes which - * entirely contains the interval [start, end - 1]. - */ -static u64 __init random_bounding_box(u64 size, u64 start, u64 end) -{ - u64 max_pgoff, pgoff; - - if ((end - start) >= size) - return 0; - - max_pgoff = (size - (end - start)) / PAGE_SIZE; - pgoff = get_random_u32_inclusive(0, max_pgoff); - - return start - pgoff * PAGE_SIZE; -} - -/* - * Modules may directly reference data and text anywhere within the kernel - * image and other modules. References using PREL32 relocations have a +/-2G - * range, and so we need to ensure that the entire kernel image and all modules - * fall within a 2G window such that these are always within range. - * - * Modules may directly branch to functions and code within the kernel text, - * and to functions and code within other modules. These branches will use - * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure - * that the entire kernel text and all module text falls within a 128M window - * such that these are always within range. With PLTs, we can expand this to a - * 2G window. - * - * We chose the 128M region to surround the entire kernel image (rather than - * just the text) as using the same bounds for the 128M and 2G regions ensures - * by construction that we never select a 128M region that is not a subset of - * the 2G region. For very large and unusual kernel configurations this means - * we may fall back to PLTs where they could have been avoided, but this keeps - * the logic significantly simpler. - */ -static int __init module_init_limits(void) -{ - u64 kernel_end = (u64)_end; - u64 kernel_start = (u64)_text; - u64 kernel_size = kernel_end - kernel_start; - - /* - * The default modules region is placed immediately below the kernel - * image, and is large enough to use the full 2G relocation range. - */ - BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); - BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); - - if (!kaslr_enabled()) { - if (kernel_size < SZ_128M) - module_direct_base = kernel_end - SZ_128M; - if (kernel_size < SZ_2G) - module_plt_base = kernel_end - SZ_2G; - } else { - u64 min = kernel_start; - u64 max = kernel_end; - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); - } else { - module_direct_base = random_bounding_box(SZ_128M, min, max); - if (module_direct_base) { - min = module_direct_base; - max = module_direct_base + SZ_128M; - } - } - - module_plt_base = random_bounding_box(SZ_2G, min, max); - } - - pr_info("%llu pages in range for non-PLT usage", - module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); - pr_info("%llu pages in range for PLT usage", - module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); - - return 0; -} -subsys_initcall(module_init_limits); - -void *module_alloc(unsigned long size) -{ - void *p = NULL; - - /* - * Where possible, prefer to allocate within direct branch range of the - * kernel such that no PLTs are necessary. - */ - if (module_direct_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_direct_base, - module_direct_base + SZ_128M, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p && module_plt_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_plt_base, - module_plt_base + SZ_2G, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p) { - pr_warn_ratelimited("%s: unable to allocate memory\n", - __func__); - } - - if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { - vfree(p); - return NULL; - } - - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(p); -} - enum aarch64_reloc_op { RELOC_OP_NONE, RELOC_OP_ABS, diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index 48c1aa456af9..29d4b6244a6f 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -210,8 +210,8 @@ static const struct { char alias[FTR_ALIAS_NAME_LEN]; char feature[FTR_ALIAS_OPTION_LEN]; } aliases[] __initconst = { - { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, - { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" }, { "arm64.nosve", "id_aa64pfr0.sve=0" }, { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 327855a11df2..4268678d0e86 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c0c050e53157..04882b577575 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-its.o vgic/vgic-debug.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c4a0a35e02c7..9996a989b52e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -35,10 +35,11 @@ #include <asm/virt.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/kvm_nested.h> #include <asm/kvm_pkvm.h> -#include <asm/kvm_emulate.h> +#include <asm/kvm_ptrauth.h> #include <asm/sections.h> #include <kvm/arm_hypercalls.h> @@ -69,15 +70,42 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } +/* + * This functions as an allow-list of protected VM capabilities. + * Features not explicitly allowed by this function are denied. + */ +static bool pkvm_ext_allowed(struct kvm *kvm, long ext) +{ + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + case KVM_CAP_MSI_DEVID: + case KVM_CAP_ARM_VM_IPA_SIZE: + case KVM_CAP_ARM_PMU_V3: + case KVM_CAP_ARM_SVE: + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + return true; + default: + return false; + } +} + int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { - int r; - u64 new_cap; + int r = -EINVAL; if (cap->flags) return -EINVAL; + if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap)) + return -EINVAL; + switch (cap->cap) { case KVM_CAP_ARM_NISV_TO_USER: r = 0; @@ -86,9 +114,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; case KVM_CAP_ARM_MTE: mutex_lock(&kvm->lock); - if (!system_supports_mte() || kvm->created_vcpus) { - r = -EINVAL; - } else { + if (system_supports_mte() && !kvm->created_vcpus) { r = 0; set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } @@ -99,25 +125,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); break; case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: - new_cap = cap->args[0]; - mutex_lock(&kvm->slots_lock); /* * To keep things simple, allow changing the chunk * size only when no memory slots have been created. */ - if (!kvm_are_all_memslots_empty(kvm)) { - r = -EINVAL; - } else if (new_cap && !kvm_is_block_size_supported(new_cap)) { - r = -EINVAL; - } else { - r = 0; - kvm->arch.mmu.split_page_chunk_size = new_cap; + if (kvm_are_all_memslots_empty(kvm)) { + u64 new_cap = cap->args[0]; + + if (!new_cap || kvm_is_block_size_supported(new_cap)) { + r = 0; + kvm->arch.mmu.split_page_chunk_size = new_cap; + } } mutex_unlock(&kvm->slots_lock); break; default: - r = -EINVAL; break; } @@ -195,6 +218,23 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm) kvm_sys_regs_create_debugfs(kvm); } +static void kvm_destroy_mpidr_data(struct kvm *kvm) +{ + struct kvm_mpidr_data *data; + + mutex_lock(&kvm->arch.config_lock); + + data = rcu_dereference_protected(kvm->arch.mpidr_data, + lockdep_is_held(&kvm->arch.config_lock)); + if (data) { + rcu_assign_pointer(kvm->arch.mpidr_data, NULL); + synchronize_rcu(); + kfree(data); + } + + mutex_unlock(&kvm->arch.config_lock); +} + /** * kvm_arch_destroy_vm - destroy the VM data structure * @kvm: pointer to the KVM struct @@ -209,7 +249,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (is_protected_kvm_enabled()) pkvm_destroy_hyp_vm(kvm); - kfree(kvm->arch.mpidr_data); + kvm_destroy_mpidr_data(kvm); + kfree(kvm->arch.sysreg_masks); kvm_destroy_vcpus(kvm); @@ -218,9 +259,47 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_arm_teardown_hypercalls(kvm); } +static bool kvm_has_full_ptr_auth(void) +{ + bool apa, gpa, api, gpi, apa3, gpa3; + u64 isar1, isar2, val; + + /* + * Check that: + * + * - both Address and Generic auth are implemented for a given + * algorithm (Q5, IMPDEF or Q3) + * - only a single algorithm is implemented. + */ + if (!system_has_full_ptr_auth()) + return false; + + isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + apa = !!FIELD_GET(ID_AA64ISAR1_EL1_APA_MASK, isar1); + val = FIELD_GET(ID_AA64ISAR1_EL1_GPA_MASK, isar1); + gpa = (val == ID_AA64ISAR1_EL1_GPA_IMP); + + api = !!FIELD_GET(ID_AA64ISAR1_EL1_API_MASK, isar1); + val = FIELD_GET(ID_AA64ISAR1_EL1_GPI_MASK, isar1); + gpi = (val == ID_AA64ISAR1_EL1_GPI_IMP); + + apa3 = !!FIELD_GET(ID_AA64ISAR2_EL1_APA3_MASK, isar2); + val = FIELD_GET(ID_AA64ISAR2_EL1_GPA3_MASK, isar2); + gpa3 = (val == ID_AA64ISAR2_EL1_GPA3_IMP); + + return (apa == gpa && api == gpi && apa3 == gpa3 && + (apa + api + apa3) == 1); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; + + if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext)) + return 0; + switch (ext) { case KVM_CAP_IRQCHIP: r = vgic_present; @@ -311,7 +390,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = system_has_full_ptr_auth(); + r = kvm_has_full_ptr_auth(); break; case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: if (kvm) @@ -378,12 +457,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; - /* - * Default value for the FP state, will be overloaded at load - * time if we support FP (pretty likely) - */ - vcpu->arch.fp_state = FP_STATE_FREE; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -395,6 +468,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + /* + * This vCPU may have been created after mpidr_data was initialized. + * Throw out the pre-computed mappings if that is the case which forces + * KVM to fall back to iteratively searching the vCPUs. + */ + kvm_destroy_mpidr_data(vcpu->kvm); + err = kvm_vgic_vcpu_init(vcpu); if (err) return err; @@ -428,6 +508,44 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) } +static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) +{ + if (vcpu_has_ptrauth(vcpu)) { + /* + * Either we're running running an L2 guest, and the API/APK + * bits come from L1's HCR_EL2, or API/APK are both set. + */ + if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { + u64 val; + + val = __vcpu_sys_reg(vcpu, HCR_EL2); + val &= (HCR_API | HCR_APK); + vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); + vcpu->arch.hcr_el2 |= val; + } else { + vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); + } + + /* + * Save the host keys if there is any chance for the guest + * to use pauth, as the entry code will reload the guest + * keys in that case. + * Protected mode is the exception to that rule, as the + * entry into the EL2 code eagerly switch back and forth + * between host and hyp keys (and kvm_hyp_ctxt is out of + * reach anyway). + */ + if (is_protected_kvm_enabled()) + return; + + if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { + struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); + ptrauth_save_keys(ctxt); + } + } +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_s2_mmu *mmu; @@ -466,8 +584,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) else vcpu_set_wfx_traps(vcpu); - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); + vcpu_set_pauth_traps(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) @@ -580,11 +698,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) } #endif -static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) -{ - return vcpu_get_flag(vcpu, VCPU_INITIALIZED); -} - static void kvm_init_mpidr_data(struct kvm *kvm) { struct kvm_mpidr_data *data = NULL; @@ -594,7 +707,8 @@ static void kvm_init_mpidr_data(struct kvm *kvm) mutex_lock(&kvm->arch.config_lock); - if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1) + if (rcu_access_pointer(kvm->arch.mpidr_data) || + atomic_read(&kvm->online_vcpus) == 1) goto out; kvm_for_each_vcpu(c, vcpu, kvm) { @@ -631,7 +745,7 @@ static void kvm_init_mpidr_data(struct kvm *kvm) data->cmpidr_to_idx[index] = c; } - kvm->arch.mpidr_data = data; + rcu_assign_pointer(kvm->arch.mpidr_data, data); out: mutex_unlock(&kvm->arch.config_lock); } @@ -790,9 +904,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); - kvm_vgic_vmcr_sync(vcpu); vcpu_set_flag(vcpu, IN_WFI); - vgic_v4_put(vcpu); + kvm_vgic_put(vcpu); preempt_enable(); kvm_vcpu_halt(vcpu); @@ -800,7 +913,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_disable(); vcpu_clear_flag(vcpu, IN_WFI); - vgic_v4_load(vcpu); + kvm_vgic_load(vcpu); preempt_enable(); } @@ -980,7 +1093,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu); - if (ret) + if (ret <= 0) return ret; } @@ -1270,7 +1383,7 @@ static unsigned long system_supported_vcpu_features(void) if (!system_supports_sve()) clear_bit(KVM_ARM_VCPU_SVE, &features); - if (!system_has_full_ptr_auth()) { + if (!kvm_has_full_ptr_auth()) { clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features); clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features); } @@ -1971,7 +2084,7 @@ static void cpu_set_hyp_vector(void) static void cpu_hyp_init_context(void) { - kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); + kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); @@ -2470,21 +2583,27 @@ out_err: struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL; + struct kvm_mpidr_data *data; unsigned long i; mpidr &= MPIDR_HWID_BITMASK; - if (kvm->arch.mpidr_data) { - u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr); + rcu_read_lock(); + data = rcu_dereference(kvm->arch.mpidr_data); + + if (data) { + u16 idx = kvm_mpidr_index(data, mpidr); - vcpu = kvm_get_vcpu(kvm, - kvm->arch.mpidr_data->cmpidr_to_idx[idx]); + vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]); if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu)) vcpu = NULL; + } + rcu_read_unlock(); + + if (vcpu) return vcpu; - } kvm_for_each_vcpu(i, vcpu, kvm) { if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 4697ba41b3a9..72d733c74a38 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2117,6 +2117,26 @@ inject: return true; } +static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit) +{ + bool control_bit_set; + + if (!vcpu_has_nv(vcpu)) + return false; + + control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit; + if (!is_hyp_ctxt(vcpu) && control_bit_set) { + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + return true; + } + return false; +} + +bool forward_smc_trap(struct kvm_vcpu *vcpu) +{ + return forward_traps(vcpu, HCR_TSC); +} + static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) { u64 mode = spsr & PSR_MODE_MASK; @@ -2152,37 +2172,39 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) { - u64 spsr, elr, mode; - bool direct_eret; + u64 spsr, elr, esr; /* - * Going through the whole put/load motions is a waste of time - * if this is a VHE guest hypervisor returning to its own - * userspace, or the hypervisor performing a local exception - * return. No need to save/restore registers, no need to - * switch S2 MMU. Just do the canonical ERET. + * Forward this trap to the virtual EL2 if the virtual + * HCR_EL2.NV bit is set and this is coming from !EL2. */ - spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2); - spsr = kvm_check_illegal_exception_return(vcpu, spsr); - - mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT); - - direct_eret = (mode == PSR_MODE_EL0t && - vcpu_el2_e2h_is_set(vcpu) && - vcpu_el2_tge_is_set(vcpu)); - direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t); - - if (direct_eret) { - *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2); - *vcpu_cpsr(vcpu) = spsr; - trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr); + if (forward_traps(vcpu, HCR_NV)) return; + + /* Check for an ERETAx */ + esr = kvm_vcpu_get_esr(vcpu); + if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) { + /* + * Oh no, ERETAx failed to authenticate. If we have + * FPACCOMBINE, deliver an exception right away. If we + * don't, then let the mangled ELR value trickle down the + * ERET handling, and the guest will have a little surprise. + */ + if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) { + esr &= ESR_ELx_ERET_ISS_ERETA; + esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC); + kvm_inject_nested_sync(vcpu, esr); + return; + } } preempt_disable(); kvm_arch_vcpu_put(vcpu); - elr = __vcpu_sys_reg(vcpu, ELR_EL2); + spsr = __vcpu_sys_reg(vcpu, SPSR_EL2); + spsr = kvm_check_illegal_exception_return(vcpu, spsr); + if (!esr_iss_is_eretax(esr)) + elr = __vcpu_sys_reg(vcpu, ELR_EL2); trace_kvm_nested_eret(vcpu, elr, spsr); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 826307e19e3a..1807d3a79a8a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -14,19 +14,6 @@ #include <asm/kvm_mmu.h> #include <asm/sysreg.h> -void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) -{ - struct task_struct *p = vcpu->arch.parent_task; - struct user_fpsimd_state *fpsimd; - - if (!is_protected_kvm_enabled() || !p) - return; - - fpsimd = &p->thread.uw.fpsimd_state; - kvm_unshare_hyp(fpsimd, fpsimd + 1); - put_task_struct(p); -} - /* * Called on entry to KVM_RUN unless this vcpu previously ran at least * once and the most recent prior KVM_RUN for this vcpu was called from @@ -38,30 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) { - int ret; - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; + int ret; - kvm_vcpu_unshare_task_fp(vcpu); + /* pKVM has its own tracking of the host fpsimd state. */ + if (is_protected_kvm_enabled()) + return 0; /* Make sure the host task fpsimd state is visible to hyp: */ ret = kvm_share_hyp(fpsimd, fpsimd + 1); if (ret) return ret; - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - - /* - * We need to keep current's task_struct pinned until its data has been - * unshared with the hypervisor to make sure it is not re-used by the - * kernel and donated to someone else while already shared -- see - * kvm_vcpu_unshare_task_fp() for the matching put_task_struct(). - */ - if (is_protected_kvm_enabled()) { - get_task_struct(current); - vcpu->arch.parent_task = current; - } - return 0; } @@ -86,7 +61,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * guest in kvm_arch_vcpu_ctxflush_fp() and override this to * FP_STATE_FREE if the flag set. */ - vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; + *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) @@ -110,7 +86,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * been saved, this is very unlikely to happen. */ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); } } @@ -126,7 +102,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; } /* @@ -142,8 +118,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { - + if (guest_owns_fp_regs()) { /* * Currently we do not support SME guests so SVCR is * always 0 and we just need a variable to point to. @@ -196,16 +171,38 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); - /* Restore the VL that was saved when bound to the CPU */ + /* + * Restore the VL that was saved when bound to the CPU, + * which is the maximum VL for the guest. Because the + * layout of the data when saving the sve state depends + * on the VL, we need to use a consistent (i.e., the + * maximum) VL. + * Note that this means that at guest exit ZCR_EL1 is + * not necessarily the same as on guest entry. + * + * Restoring the VL isn't needed in VHE mode since + * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same + * role when doing the save from EL2. + */ if (!has_vhe()) sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL1); } + /* + * Flush (save and invalidate) the fpsimd/sve state so that if + * the host tries to use fpsimd/sve, it's not using stale data + * from the guest. + * + * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the + * context unconditionally, in both nVHE and VHE. This allows + * the kernel to restore the fpsimd/sve state, including ZCR_EL1 + * when needed. + */ fpsimd_save_and_flush_cpu_state(); } else if (has_vhe() && system_supports_sve()) { /* diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 617ae6dea5d5..b037f0a0e27e 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -56,6 +56,13 @@ static int handle_hvc(struct kvm_vcpu *vcpu) static int handle_smc(struct kvm_vcpu *vcpu) { /* + * Forward this trapped smc instruction to the virtual EL2 if + * the guest has asked for it. + */ + if (forward_smc_trap(vcpu)) + return 1; + + /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a * Trap exception, not a Secure Monitor Call exception [...]" @@ -207,19 +214,40 @@ static int handle_sve(struct kvm_vcpu *vcpu) } /* - * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into - * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all - * that we can do is give the guest an UNDEF. + * Two possibilities to handle a trapping ptrauth instruction: + * + * - Guest usage of a ptrauth instruction (which the guest EL1 did not + * turn into a NOP). If we get here, it is because we didn't enable + * ptrauth for the guest. This results in an UNDEF, as it isn't + * supposed to use ptrauth without being told it could. + * + * - Running an L2 NV guest while L1 has left HCR_EL2.API==0, and for + * which we reinject the exception into L1. + * + * Anything else is an emulation bug (hence the WARN_ON + UNDEF). */ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) { + if (!vcpu_has_ptrauth(vcpu)) { + kvm_inject_undefined(vcpu); + return 1; + } + + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + return 1; + } + + /* Really shouldn't be here! */ + WARN_ON_ONCE(1); kvm_inject_undefined(vcpu); return 1; } static int kvm_handle_eret(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET) + if (esr_iss_is_eretax(kvm_vcpu_get_esr(vcpu)) && + !vcpu_has_ptrauth(vcpu)) return kvm_handle_ptrauth(vcpu); /* diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 961bbef104a6..d00093699aaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(host_dbg, host_ctxt); @@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(guest_dbg, guest_ctxt); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e3fcf8c4d5b4..a92566f36022 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,6 +27,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> #include <asm/kvm_nested.h> +#include <asm/kvm_ptrauth.h> #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> @@ -39,12 +40,6 @@ struct kvm_exception_table_entry { extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table; -/* Check whether the FP regs are owned by the guest */ -static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; -} - /* Save the 32-bit only FPSIMD system register state */ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { @@ -155,7 +150,7 @@ static inline bool cpu_has_amu(void) static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); CHECK_FGT_MASKS(HFGRTR_EL2); @@ -191,7 +186,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) @@ -226,13 +221,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { @@ -254,13 +249,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { - write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } @@ -271,10 +266,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) __deactivate_traps_hfgxtr(vcpu); } -static inline void ___activate_traps(struct kvm_vcpu *vcpu) +static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr) { - u64 hcr = vcpu->arch.hcr_el2; - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) hcr |= HCR_TVM; @@ -376,8 +369,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + if (host_owns_fp_regs()) + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ if (sve_guest) @@ -389,7 +382,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; return true; } @@ -449,60 +442,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } -static inline bool esr_is_ptrauth_trap(u64 esr) -{ - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(ctxt, key) \ - do { \ - u64 __val; \ - __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ - __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ - ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ -} while(0) - -DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); - -static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - struct kvm_cpu_context *ctxt; - u64 val; - - if (!vcpu_has_ptrauth(vcpu)) - return false; - - ctxt = this_cpu_ptr(&kvm_hyp_ctxt); - __ptrauth_save_key(ctxt, APIA); - __ptrauth_save_key(ctxt, APIB); - __ptrauth_save_key(ctxt, APDA); - __ptrauth_save_key(ctxt, APDB); - __ptrauth_save_key(ctxt, APGA); - - vcpu_ptrauth_enable(vcpu); - - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); - - return true; -} - static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) { struct arch_timer_context *ctxt; @@ -590,9 +529,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) __vgic_v3_perform_cpuif_access(vcpu) == 1) return true; - if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) - return kvm_hyp_handle_ptrauth(vcpu, exit_code); - if (kvm_hyp_handle_cntpct(vcpu)) return true; diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 82b3d62538a6..22f374e9f532 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -53,7 +53,13 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); } +static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return vcpu_is_protected(&hyp_vcpu->vcpu); +} + void pkvm_hyp_vm_table_init(void *tbl); +void pkvm_host_fpsimd_state_init(void); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b6f..53efda0235cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 320f2eaa14a9..02746f9d0980 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -600,7 +600,6 @@ static bool ffa_call_supported(u64 func_id) case FFA_MSG_POLL: case FFA_MSG_WAIT: /* 32-bit variants of 64-bit calls */ - case FFA_MSG_SEND_DIRECT_REQ: case FFA_MSG_SEND_DIRECT_RESP: case FFA_RXTX_MAP: case FFA_MEM_DONATE: diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed87..d5c48dc98f67 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -39,10 +39,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; @@ -64,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) @@ -178,16 +175,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); @@ -198,18 +185,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); } -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if)); } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -340,10 +327,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 861c76021a25..caba3e4bd09e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) int ret = 0; esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + if (!__get_fault_info(esr, &fault)) { + /* + * We've presumably raced with a page-table change which caused + * AT to fail, try again. + */ + return; + } addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; ret = host_stage2_idmap(addr); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 26dd9a20ad6e..16aa4875ddb8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -200,7 +200,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) } /* - * Initialize trap register values for protected VMs. + * Initialize trap register values in protected mode. */ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) { @@ -247,6 +247,17 @@ void pkvm_hyp_vm_table_init(void *tbl) vm_table = tbl; } +void pkvm_host_fpsimd_state_init(void) +{ + unsigned long i; + + for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); + + host_data->fpsimd_state = &host_data->host_ctxt.fp_regs; + } +} + /* * Return the hyp vm structure corresponding to the handle. */ @@ -430,6 +441,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size) static void __unmap_donated_memory(void *va, size_t size) { + kvm_flush_dcache_to_poc(va, size); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), PAGE_ALIGN(size) >> PAGE_SHIFT)); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d..dfe8fe0f7eaf 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bc58d1b515af..859f22f754d3 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; @@ -301,6 +300,7 @@ void __noreturn __pkvm_init_finalise(void) goto out; pkvm_hyp_vm_table_init(vm_table_base); + pkvm_host_fpsimd_state_init(); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..6758cd905570 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; - ___activate_traps(vcpu); + ___activate_traps(vcpu, vcpu->arch.hcr_el2); __activate_traps_common(vcpu); val = vcpu->arch.cptr_el2; @@ -53,7 +53,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TSM; } - if (!guest_owns_fp_regs(vcpu)) { + if (!guest_owns_fp_regs()) { if (has_hvhe()) val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); @@ -191,7 +191,6 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; @@ -203,13 +202,12 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) + if (unlikely(vcpu_is_protected(vcpu))) return pvm_exit_handlers; return hyp_exit_handlers; @@ -228,9 +226,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) */ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - - if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM @@ -264,7 +260,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -337,7 +333,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -367,7 +363,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 2fc68da4036d..ca3c09df8d7c 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -11,13 +11,23 @@ #include <nvhe/mem_protect.h> struct tlb_inv_context { - u64 tcr; + struct kvm_s2_mmu *mmu; + u64 tcr; + u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt, - bool nsh) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt, + bool nsh) { + struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + cxt->mmu = NULL; + /* * We have two requirements: * @@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, else dsb(ish); + /* + * If we're already in the desired context, then there's nothing to do. + */ + if (vcpu) { + /* + * We're in guest context. However, for this to work, this needs + * to be called from within __kvm_vcpu_run(), which ensures that + * __hyp_running_vcpu is set to the current guest vcpu. + */ + if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu)) + return; + + cxt->mmu = vcpu->arch.hw_mmu; + } else { + /* We're in host context. */ + if (mmu == host_s2_mmu) + return; + + cxt->mmu = host_s2_mmu; + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. + * avoid a Stage-1 walk with the old VMID while we have + * the new VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the host S1 MMU is enabled, so + * we can simply set the EPD bits to avoid any further + * TLB fill. For guests, we ensure that the S1 MMU is + * temporarily enabled in the next context. */ val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; write_sysreg_el1(val, SYS_TCR); isb(); + + if (vcpu) { + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + if (!(val & SCTLR_ELx_M)) { + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + isb(); + } + } else { + /* The host S1 MMU is always enabled. */ + cxt->sctlr = SCTLR_ELx_M; + } } /* @@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + if (vcpu) + __load_host_stage2(); + else + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { - __load_host_stage2(); + struct kvm_s2_mmu *mmu = cxt->mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + + if (!mmu) + return; + + if (vcpu) + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + else + __load_host_stage2(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ + /* Ensure write of the old VMID */ isb(); - /* Restore the host's TCR_EL1 */ + + if (!(cxt->sctlr & SCTLR_ELx_M)) { + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + isb(); + } + write_sysreg_el1(cxt->tcr, SYS_TCR); } } @@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, true); + enter_vmid_context(mmu, &cxt, true); /* * We could do so much better if we had the VA as well. @@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, start = round_down(start, stride); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, TLBI_TTL_UNKNOWN); @@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) { - /* Same remark as in __tlb_switch_to_guest() */ + /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); dsb(ish); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 5a59ef88b646..9e2bbee77491 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -914,12 +914,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) { u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); + return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL); } static bool stage2_pte_executable(kvm_pte_t pte) { - return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); + return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, @@ -979,6 +979,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_pte_needs_update(ctx->old, new)) return -EAGAIN; + /* If we're only changing software bits, then store them and go! */ + if (!kvm_pgtable_walk_shared(ctx) && + !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) { + bool old_is_counted = stage2_pte_is_counted(ctx->old); + + if (old_is_counted != stage2_pte_is_counted(new)) { + if (old_is_counted) + mm_ops->put_page(ctx->ptep); + else + mm_ops->get_page(ctx->ptep); + } + WARN_ON_ONCE(!stage2_try_set_pte(ctx, new)); + return 0; + } + if (!stage2_try_break_pte(ctx, data->mmu)) return -EAGAIN; @@ -1370,7 +1385,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) + if (!stage2_pte_cacheable(pgt, ctx->old)) return 0; if (mm_ops->dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 6cb638b184b1..7b397fad26f2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void) return val; } -u64 __vgic_v3_read_vmcr(void) +static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __vgic_v3_write_vmcr(u32 vmcr) +static void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + __vgic_v3_save_aprs(cpu_if); + if (cpu_if->vgic_sre) + cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); +} + +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (cpu_if->vgic_sre) + __vgic_v3_write_vmcr(cpu_if->vgic_vmcr); + __vgic_v3_restore_aprs(cpu_if); +} + static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..d7af5f46f22a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -33,11 +33,43 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); +/* + * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1 + * semantics, irrespective of the configuration), but that cannot be + * applied to the actual HW as things would otherwise break badly. + * + * - TGE: we want the guest to use EL1, which is incompatible with + * this bit being set + * + * - API/APK: they are already accounted for by vcpu_load(), and can + * only take effect across a load/put cycle (such as ERET) + */ +#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK) + +static u64 __compute_hcr(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (!vcpu_has_nv(vcpu)) + return hcr; + + if (is_hyp_ctxt(vcpu)) { + hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB; + + if (!vcpu_el2_e2h_is_set(vcpu)) + hcr |= HCR_NV1; + + write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2); + } + + return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE); +} + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; - ___activate_traps(vcpu); + ___activate_traps(vcpu, __compute_hcr(vcpu)); if (has_cntpoff()) { struct timer_map map; @@ -75,7 +107,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM; - if (guest_owns_fp_regs(vcpu)) { + if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { @@ -162,6 +194,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu) void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu) { + host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu; + __vcpu_load_switch_sysregs(vcpu); __vcpu_load_activate_traps(vcpu); __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); @@ -171,6 +205,61 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu) { __vcpu_put_deactivate_traps(vcpu); __vcpu_put_switch_sysregs(vcpu); + + host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL; +} + +static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + u64 spsr, elr, mode; + + /* + * Going through the whole put/load motions is a waste of time + * if this is a VHE guest hypervisor returning to its own + * userspace, or the hypervisor performing a local exception + * return. No need to save/restore registers, no need to + * switch S2 MMU. Just do the canonical ERET. + * + * Unless the trap has to be forwarded further down the line, + * of course... + */ + if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) || + (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET)) + return false; + + spsr = read_sysreg_el1(SYS_SPSR); + mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT); + + switch (mode) { + case PSR_MODE_EL0t: + if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) + return false; + break; + case PSR_MODE_EL2t: + mode = PSR_MODE_EL1t; + break; + case PSR_MODE_EL2h: + mode = PSR_MODE_EL1h; + break; + default: + return false; + } + + /* If ERETAx fails, take the slow path */ + if (esr_iss_is_eretax(esr)) { + if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr))) + return false; + } else { + elr = read_sysreg_el1(SYS_ELR); + } + + spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode; + + write_sysreg_el2(spsr, SYS_SPSR); + write_sysreg_el2(elr, SYS_ELR); + + return true; } static const exit_handler_fn hyp_exit_handlers[] = { @@ -182,7 +271,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, + [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; @@ -197,7 +286,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. */ - if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) { + if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) { u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); switch (mode) { @@ -221,8 +310,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; sysreg_save_host_state_vhe(host_ctxt); @@ -240,11 +328,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - if (is_hyp_ctxt(vcpu)) - vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT); - else - vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu); @@ -258,7 +341,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -306,7 +389,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index a8b9ea496706..e12bd7d6d2dc 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); /* @@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 1a60b95381e8..5fa0359f3a87 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -17,8 +17,8 @@ struct tlb_inv_context { u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); u64 val; @@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, isb(); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { /* * We're done with the TLB operation, let's restore the host's @@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nshst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, TLBI_TTL_UNKNOWN); @@ -179,7 +179,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -189,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -203,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 200c8019a82a..cd6b7b83e2c3 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -86,7 +86,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) /* Detect an already handled MMIO return */ if (unlikely(!vcpu->mmio_needed)) - return 0; + return 1; vcpu->mmio_needed = 0; @@ -117,7 +117,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) */ kvm_incr_pc(vcpu); - return 0; + return 1; } int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) @@ -133,11 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) /* * No valid syndrome? Ask userspace for help if it has * volunteered to do so, and bail out otherwise. + * + * In the protected VM case, there isn't much userspace can do + * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); + if (vcpu_is_protected(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index dc04bc767865..8bcab0cc3fe9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1522,8 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_invalidate_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) { + ret = -EAGAIN; goto out_unlock; + } /* * If we are not forced to use page mapping, check if we are @@ -1581,6 +1583,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, memcache, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); +out_unlock: + read_unlock(&kvm->mmu_lock); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1588,8 +1592,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mark_page_dirty_in_slot(kvm, memslot, gfn); } -out_unlock: - read_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; } @@ -1768,40 +1770,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.mmu.pgt) - return false; - - WARN_ON(range->end - range->start != 1); - - /* - * If the page isn't tagged, defer to user_mem_abort() for sanitising - * the MTE tags. The S2 pte should have been unmapped by - * mmu_notifier_invalidate_range_end(). - */ - if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn))) - return false; - - /* - * We've moved a page around, probably through CoW, so let's treat - * it just like a translation fault and the map handler will clean - * the cache to the PoC. - * - * The MMU notifiers will have unmapped a huge PMD before calling - * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and - * therefore we never need to clear out a huge PMD through this - * calling path and a memcache is not required. - */ - kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, - PAGE_SIZE, __pfn_to_phys(pfn), - KVM_PGTABLE_PROT_R, NULL, 0); - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { u64 size = (range->end - range->start) << PAGE_SHIFT; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index ced30c90521a..6813c7c7f00a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -35,13 +35,9 @@ static u64 limit_nv_id_reg(u32 id, u64 val) break; case SYS_ID_AA64ISAR1_EL1: - /* Support everything but PtrAuth and Spec Invalidation */ + /* Support everything but Spec Invalidation */ val &= ~(GENMASK_ULL(63, 56) | - NV_FTR(ISAR1, SPECRES) | - NV_FTR(ISAR1, GPI) | - NV_FTR(ISAR1, GPA) | - NV_FTR(ISAR1, API) | - NV_FTR(ISAR1, APA)); + NV_FTR(ISAR1, SPECRES)); break; case SYS_ID_AA64PFR0_EL1: diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c new file mode 100644 index 000000000000..d5eb3ae876be --- /dev/null +++ b/arch/arm64/kvm/pauth.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + * + * Primitive PAuth emulation for ERETAA/ERETAB. + * + * This code assumes that is is run from EL2, and that it is part of + * the emulation of ERETAx for a guest hypervisor. That's a lot of + * baked-in assumptions and shortcuts. + * + * Do no reuse for anything else! + */ + +#include <linux/kvm_host.h> + +#include <asm/gpr-num.h> +#include <asm/kvm_emulate.h> +#include <asm/pointer_auth.h> + +/* PACGA Xd, Xn, Xm */ +#define PACGA(d,n,m) \ + asm volatile(__DEFINE_ASM_GPR_NUMS \ + ".inst 0x9AC03000 |" \ + "(.L__gpr_num_%[Rd] << 0) |" \ + "(.L__gpr_num_%[Rn] << 5) |" \ + "(.L__gpr_num_%[Rm] << 16)\n" \ + : [Rd] "=r" ((d)) \ + : [Rn] "r" ((n)), [Rm] "r" ((m))) + +static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr, + struct ptrauth_key ikey) +{ + struct ptrauth_key gkey; + u64 mod, pac = 0; + + preempt_disable(); + + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) + mod = __vcpu_sys_reg(vcpu, SP_EL2); + else + mod = read_sysreg(sp_el1); + + gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1); + gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1); + + __ptrauth_key_install_nosync(APGA, ikey); + isb(); + + PACGA(pac, ptr, mod); + isb(); + + __ptrauth_key_install_nosync(APGA, gkey); + + preempt_enable(); + + /* PAC in the top 32bits */ + return pac; +} + +static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55) +{ + u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + bool tbi, tbid; + + /* + * Since we are authenticating an instruction address, we have + * to take TBID into account. If E2H==0, ignore VA[55], as + * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in + * this case, this is likely a guest bug... + */ + if (!vcpu_el2_e2h_is_set(vcpu)) { + tbi = tcr & BIT(20); + tbid = tcr & BIT(29); + } else if (bit55) { + tbi = tcr & TCR_TBI1; + tbid = tcr & TCR_TBID1; + } else { + tbi = tcr & TCR_TBI0; + tbid = tcr & TCR_TBID0; + } + + return tbi && !tbid; +} + +static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55) +{ + static const int maxtxsz = 39; // Revisit these two values once + static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2 + u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + int txsz; + + if (!vcpu_el2_e2h_is_set(vcpu) || !bit55) + txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + else + txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + + return 64 - clamp(txsz, mintxsz, maxtxsz); +} + +static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55) +{ + int bottom_pac; + u64 mask; + + bottom_pac = compute_bottom_pac(vcpu, bit55); + + mask = GENMASK(54, bottom_pac); + if (!effective_tbi(vcpu, bit55)) + mask |= GENMASK(63, 56); + + return mask; +} + +static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask) +{ + bool bit55 = !!(ptr & BIT(55)); + + if (bit55) + return ptr | mask; + + return ptr & ~mask; +} + +static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr) +{ + bool bit55 = !!(ptr & BIT(55)); + u64 mask, error_code; + int shift; + + if (effective_tbi(vcpu, bit55)) { + mask = GENMASK(54, 53); + shift = 53; + } else { + mask = GENMASK(62, 61); + shift = 61; + } + + if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu))) + error_code = 2 << shift; + else + error_code = 1 << shift; + + ptr &= ~mask; + ptr |= error_code; + + return ptr; +} + +/* + * Authenticate an ERETAA/ERETAB instruction, returning true if the + * authentication succeeded and false otherwise. In all cases, *elr + * contains the VA to ERET to. Potential exception injection is left + * to the caller. + */ +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) +{ + u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); + u64 esr = kvm_vcpu_get_esr(vcpu); + u64 ptr, cptr, pac, mask; + struct ptrauth_key ikey; + + *elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2); + + /* We assume we're already in the context of an ERETAx */ + if (esr_iss_is_eretab(esr)) { + if (!(sctlr & SCTLR_EL1_EnIB)) + return true; + + ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1); + ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1); + } else { + if (!(sctlr & SCTLR_EL1_EnIA)) + return true; + + ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1); + ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1); + } + + mask = compute_pac_mask(vcpu, !!(ptr & BIT(55))); + cptr = to_canonical_addr(vcpu, ptr, mask); + + pac = compute_pac(vcpu, cptr, ikey); + + /* + * Slightly deviate from the pseudocode: if we have a PAC + * match with the signed pointer, then it must be good. + * Anything after this point is pure error handling. + */ + if ((pac & mask) == (ptr & mask)) { + *elr = cptr; + return true; + } + + /* + * Authentication failed, corrupt the canonical address if + * PAuth2 isn't implemented, or some XORing if it is. + */ + if (!kvm_has_pauth(vcpu->kvm, PAuth2)) + cptr = corrupt_addr(vcpu, cptr); + else + cptr = ptr ^ (pac & mask); + + *elr = cptr; + return false; +} diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index b7be96a53597..85117ea8f351 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -222,7 +222,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm) int pkvm_init_host_vm(struct kvm *host_kvm) { - mutex_init(&host_kvm->lock); return 0; } @@ -259,6 +258,7 @@ static int __init finalize_pkvm(void) * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); ret = pkvm_drop_host_privileges(); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index a243934c5568..329819806096 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val) if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) return false; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 68d1d05672bd..1b7b58cb121f 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -151,7 +151,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { void *sve_state = vcpu->arch.sve_state; - kvm_vcpu_unshare_task_fp(vcpu); kvm_unshare_hyp(vcpu, vcpu + 1); if (sve_state) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c9f4f387155f..22b45a15d068 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1568,17 +1568,31 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r return IDREG(vcpu->kvm, reg_to_encoding(r)); } +static bool is_feature_id_reg(u32 encoding) +{ + return (sys_reg_Op0(encoding) == 3 && + (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) && + sys_reg_CRn(encoding) == 0 && + sys_reg_CRm(encoding) <= 7); +} + /* * Return true if the register's (Op0, Op1, CRn, CRm, Op2) is - * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8. + * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID + * registers KVM maintains on a per-VM basis. */ -static inline bool is_id_reg(u32 id) +static inline bool is_vm_ftr_id_reg(u32 id) { return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && sys_reg_CRm(id) < 8); } +static inline bool is_vcpu_ftr_id_reg(u32 id) +{ + return is_feature_id_reg(id) && !is_vm_ftr_id_reg(id); +} + static inline bool is_aa32_id_reg(u32 id) { return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && @@ -2338,7 +2352,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64MMFR0_EL1_TGRAN16_2)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_TWED | ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_VH | @@ -3069,12 +3082,14 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, for (i = 0; i < n; i++) { if (!is_32 && table[i].reg && !table[i].reset) { - kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i); + kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n", + &table[i], i, table[i].name); return false; } if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { - kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1); + kvm_err("sys_reg table %pS entry %d (%s -> %s) out of order\n", + &table[i], i, table[i - 1].name, table[i].name); return false; } } @@ -3509,26 +3524,25 @@ void kvm_sys_regs_create_debugfs(struct kvm *kvm) &idregs_debug_fops); } -static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) +static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg) { - const struct sys_reg_desc *idreg = first_idreg; - u32 id = reg_to_encoding(idreg); + u32 id = reg_to_encoding(reg); struct kvm *kvm = vcpu->kvm; if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) return; lockdep_assert_held(&kvm->arch.config_lock); + IDREG(kvm, id) = reg->reset(vcpu, reg); +} - /* Initialize all idregs */ - while (is_id_reg(id)) { - IDREG(kvm, id) = idreg->reset(vcpu, idreg); - - idreg++; - id = reg_to_encoding(idreg); - } +static void reset_vcpu_ftr_id_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *reg) +{ + if (kvm_vcpu_initialized(vcpu)) + return; - set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); + reg->reset(vcpu, reg); } /** @@ -3540,19 +3554,24 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu) */ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; unsigned long i; - kvm_reset_id_regs(vcpu); - for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { const struct sys_reg_desc *r = &sys_reg_descs[i]; - if (is_id_reg(reg_to_encoding(r))) + if (!r->reset) continue; - if (r->reset) + if (is_vm_ftr_id_reg(reg_to_encoding(r))) + reset_vm_ftr_id_reg(vcpu, r); + else if (is_vcpu_ftr_id_reg(reg_to_encoding(r))) + reset_vcpu_ftr_id_reg(vcpu, r); + else r->reset(vcpu, r); } + + set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); } /** @@ -3978,14 +3997,6 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) sys_reg_CRm(r), \ sys_reg_Op2(r)) -static bool is_feature_id_reg(u32 encoding) -{ - return (sys_reg_Op0(encoding) == 3 && - (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) && - sys_reg_CRn(encoding) == 0 && - sys_reg_CRm(encoding) <= 7); -} - int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range) { const void *zero_page = page_to_virt(ZERO_PAGE(0)); @@ -4014,7 +4025,7 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range * * compliant with a given revision of the architecture, but the * RES0/RES1 definitions allow us to do that. */ - if (is_id_reg(encoding)) { + if (is_vm_ftr_id_reg(encoding)) { if (!reg->val || (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0())) continue; diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 389025ce7749..bcbc8c986b1d 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -28,27 +28,65 @@ struct vgic_state_iter { int nr_lpis; int dist_id; int vcpu_id; - int intid; + unsigned long intid; int lpi_idx; - u32 *lpi_array; }; -static void iter_next(struct vgic_state_iter *iter) +static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) { + struct vgic_dist *dist = &kvm->arch.vgic; + if (iter->dist_id == 0) { iter->dist_id++; return; } + /* + * Let the xarray drive the iterator after the last SPI, as the iterator + * has exhausted the sequentially-allocated INTID space. + */ + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->lpi_idx < iter->nr_lpis) + xa_find_after(&dist->lpi_xa, &iter->intid, + VGIC_LPI_MAX_INTID, + LPI_XA_MARK_DEBUG_ITER); + iter->lpi_idx++; + return; + } + iter->intid++; if (iter->intid == VGIC_NR_PRIVATE_IRQS && ++iter->vcpu_id < iter->nr_cpus) iter->intid = 0; +} - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) { - if (iter->lpi_idx < iter->nr_lpis) - iter->intid = iter->lpi_array[iter->lpi_idx]; - iter->lpi_idx++; +static int iter_mark_lpis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long intid; + int nr_lpis = 0; + + xa_for_each(&dist->lpi_xa, intid, irq) { + if (!vgic_try_get_irq_kref(irq)) + continue; + + xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + nr_lpis++; + } + + return nr_lpis; +} + +static void iter_unmark_lpis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long intid; + + xa_for_each(&dist->lpi_xa, intid, irq) { + xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + vgic_put_irq(kvm, irq); } } @@ -61,15 +99,12 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, iter->nr_cpus = nr_cpus; iter->nr_spis = kvm->arch.vgic.nr_spis; - if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array); - if (iter->nr_lpis < 0) - iter->nr_lpis = 0; - } + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + iter->nr_lpis = iter_mark_lpis(kvm); /* Fast forward to the right position if needed */ while (pos--) - iter_next(iter); + iter_next(kvm, iter); } static bool end_of_vgic(struct vgic_state_iter *iter) @@ -114,7 +149,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) struct vgic_state_iter *iter = kvm->arch.vgic.iter; ++*pos; - iter_next(iter); + iter_next(kvm, iter); if (end_of_vgic(iter)) iter = NULL; return iter; @@ -134,13 +169,14 @@ static void vgic_debug_stop(struct seq_file *s, void *v) mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; - kfree(iter->lpi_array); + iter_unmark_lpis(kvm); kfree(iter); kvm->arch.vgic.iter = NULL; mutex_unlock(&kvm->arch.config_lock); } -static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist, + struct vgic_state_iter *iter) { bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; @@ -149,7 +185,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); if (v3) - seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count)); + seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis); seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "\n"); @@ -236,7 +272,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) unsigned long flags; if (iter->dist_id == 0) { - print_dist_state(s, &kvm->arch.vgic); + print_dist_state(s, &kvm->arch.vgic, iter); return 0; } @@ -246,11 +282,13 @@ static int vgic_debug_show(struct seq_file *s, void *v) if (iter->vcpu_id < iter->nr_cpus) vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + /* + * Expect this to succeed, as iter_mark_lpis() takes a reference on + * every LPI to be visited. + */ irq = vgic_get_irq(kvm, vcpu, iter->intid); - if (!irq) { - seq_printf(s, " LPI %4d freed\n", iter->intid); - return 0; - } + if (WARN_ON_ONCE(!irq)) + return -EINVAL; raw_spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index f20941f83a07..8f5b7a3e7009 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,8 +53,6 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - INIT_LIST_HEAD(&dist->lpi_translation_cache); - raw_spin_lock_init(&dist->lpi_list_lock); xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } @@ -182,27 +180,22 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) return 0; } -/** - * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data - * structures and register VCPU-specific KVM iodevs - * - * @vcpu: pointer to the VCPU being created and initialized - * - * Only do initialization, but do not actually enable the - * VGIC CPU interface - */ -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int ret = 0; int i; - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + lockdep_assert_held(&vcpu->kvm->arch.config_lock); - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - raw_spin_lock_init(&vgic_cpu->ap_list_lock); - atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); + if (vgic_cpu->private_irqs) + return 0; + + vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS, + sizeof(struct vgic_irq), + GFP_KERNEL_ACCOUNT); + + if (!vgic_cpu->private_irqs) + return -ENOMEM; /* * Enable and configure all SGIs to be edge-triggered and @@ -227,9 +220,48 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) } } + return 0; +} + +static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu) +{ + int ret; + + mutex_lock(&vcpu->kvm->arch.config_lock); + ret = vgic_allocate_private_irqs_locked(vcpu); + mutex_unlock(&vcpu->kvm->arch.config_lock); + + return ret; +} + +/** + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * + * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface + */ +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); + if (!irqchip_in_kernel(vcpu->kvm)) return 0; + ret = vgic_allocate_private_irqs(vcpu); + if (ret) + return ret; + /* * If we are creating a VCPU with a GICv3 we must also register the * KVM io device for the redistributor that belongs to this VCPU. @@ -285,10 +317,13 @@ int vgic_init(struct kvm *kvm) /* Initialize groups on CPUs created before the VGIC type was known */ kvm_for_each_vcpu(idx, vcpu, kvm) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + ret = vgic_allocate_private_irqs_locked(vcpu); + if (ret) + goto out; for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i); + switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V3: irq->group = 1; @@ -300,14 +335,15 @@ int vgic_init(struct kvm *kvm) break; default: ret = -EINVAL; - goto out; } + + vgic_put_irq(kvm, irq); + + if (ret) + goto out; } } - if (vgic_has_its(kvm)) - vgic_lpi_translation_cache_init(kvm); - /* * If we have GICv4.1 enabled, unconditionally request enable the * v4 support so that we get HW-accelerated vSGIs. Otherwise, only @@ -361,9 +397,6 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) dist->vgic_cpu_base = VGIC_ADDR_UNDEF; } - if (vgic_has_its(kvm)) - vgic_lpi_translation_cache_destroy(kvm); - if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); @@ -381,6 +414,9 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + kfree(vgic_cpu->private_irqs); + vgic_cpu->private_irqs = NULL; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { vgic_unregister_redist_iodev(vcpu); vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index e85a495ada9c..40bb43f20bf3 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -23,6 +23,8 @@ #include "vgic.h" #include "vgic-mmio.h" +static struct kvm_device_ops kvm_arm_vgic_its_ops; + static int vgic_its_save_tables_v0(struct vgic_its *its); static int vgic_its_restore_tables_v0(struct vgic_its *its); static int vgic_its_commit_v0(struct vgic_its *its); @@ -67,7 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + xa_lock_irqsave(&dist->lpi_xa, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -82,17 +84,14 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, goto out_unlock; } - ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0)); + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); if (ret) { xa_release(&dist->lpi_xa, intid); kfree(irq); - goto out_unlock; } - atomic_inc(&dist->lpi_count); - out_unlock: - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + xa_unlock_irqrestore(&dist->lpi_xa, flags); if (ret) return ERR_PTR(ret); @@ -150,14 +149,6 @@ struct its_ite { u32 event_id; }; -struct vgic_translation_cache_entry { - struct list_head entry; - phys_addr_t db; - u32 devid; - u32 eventid; - struct vgic_irq *irq; -}; - /** * struct vgic_its_abi - ITS abi ops and settings * @cte_esz: collection table entry size @@ -252,8 +243,10 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, #define GIC_LPI_OFFSET 8192 -#define VITS_TYPER_IDBITS 16 -#define VITS_TYPER_DEVBITS 16 +#define VITS_TYPER_IDBITS 16 +#define VITS_MAX_EVENTID (BIT(VITS_TYPER_IDBITS) - 1) +#define VITS_TYPER_DEVBITS 16 +#define VITS_MAX_DEVID (BIT(VITS_TYPER_DEVBITS) - 1) #define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1) #define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1) @@ -316,53 +309,6 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, return 0; } -#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1) - -/* - * Create a snapshot of the current LPIs targeting @vcpu, so that we can - * enumerate those LPIs without holding any lock. - * Returns their number and puts the kmalloc'ed array into intid_ptr. - */ -int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET); - struct vgic_irq *irq; - unsigned long flags; - u32 *intids; - int irq_count, i = 0; - - /* - * There is an obvious race between allocating the array and LPIs - * being mapped/unmapped. If we ended up here as a result of a - * command, we're safe (locks are held, preventing another - * command). If coming from another path (such as enabling LPIs), - * we must be careful not to overrun the array. - */ - irq_count = atomic_read(&dist->lpi_count); - intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); - if (!intids) - return -ENOMEM; - - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - rcu_read_lock(); - - xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) { - if (i == irq_count) - break; - /* We don't need to "get" the IRQ, as we hold the list lock. */ - if (vcpu && irq->target_vcpu != vcpu) - continue; - intids[i++] = irq->intid; - } - - rcu_read_unlock(); - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); - - *intid_ptr = intids; - return i; -} - static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) { int ret = 0; @@ -446,23 +392,18 @@ static u32 max_lpis_propbaser(u64 propbaser) static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) { gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; int last_byte_offset = -1; int ret = 0; - u32 *intids; - int nr_irqs, i; - unsigned long flags; u8 pendmask; - nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids); - if (nr_irqs < 0) - return nr_irqs; - - for (i = 0; i < nr_irqs; i++) { + xa_for_each(&dist->lpi_xa, intid, irq) { int byte_offset, bit_nr; - byte_offset = intids[i] / BITS_PER_BYTE; - bit_nr = intids[i] % BITS_PER_BYTE; + byte_offset = intid / BITS_PER_BYTE; + bit_nr = intid % BITS_PER_BYTE; /* * For contiguously allocated LPIs chances are we just read @@ -472,25 +413,23 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) ret = kvm_read_guest_lock(vcpu->kvm, pendbase + byte_offset, &pendmask, 1); - if (ret) { - kfree(intids); + if (ret) return ret; - } + last_byte_offset = byte_offset; } - irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + irq = vgic_get_irq(vcpu->kvm, NULL, intid); if (!irq) continue; raw_spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = pendmask & (1U << bit_nr); + if (irq->target_vcpu == vcpu) + irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } - kfree(intids); - return ret; } @@ -566,51 +505,52 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, return 0; } -static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, - phys_addr_t db, - u32 devid, u32 eventid) +static struct vgic_its *__vgic_doorbell_to_its(struct kvm *kvm, gpa_t db) { - struct vgic_translation_cache_entry *cte; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; - list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { - /* - * If we hit a NULL entry, there is nothing after this - * point. - */ - if (!cte->irq) - break; + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, db); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); - if (cte->db != db || cte->devid != devid || - cte->eventid != eventid) - continue; + if (kvm_io_dev->ops != &kvm_io_gic_ops) + return ERR_PTR(-EINVAL); - /* - * Move this entry to the head, as it is the most - * recently used. - */ - if (!list_is_first(&cte->entry, &dist->lpi_translation_cache)) - list_move(&cte->entry, &dist->lpi_translation_cache); + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + if (iodev->iodev_type != IODEV_ITS) + return ERR_PTR(-EINVAL); - return cte->irq; - } + return iodev->its; +} + +static unsigned long vgic_its_cache_key(u32 devid, u32 eventid) +{ + return (((unsigned long)devid) << VITS_TYPER_IDBITS) | eventid; - return NULL; } static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, u32 devid, u32 eventid) { - struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long cache_key = vgic_its_cache_key(devid, eventid); + struct vgic_its *its; struct vgic_irq *irq; - unsigned long flags; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + if (devid > VITS_MAX_DEVID || eventid > VITS_MAX_EVENTID) + return NULL; - irq = __vgic_its_check_cache(dist, db, devid, eventid); + its = __vgic_doorbell_to_its(kvm, db); + if (IS_ERR(its)) + return NULL; + + rcu_read_lock(); + + irq = xa_load(&its->translation_cache, cache_key); if (!vgic_try_get_irq_kref(irq)) irq = NULL; - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + rcu_read_unlock(); return irq; } @@ -619,41 +559,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq *irq) { - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_translation_cache_entry *cte; - unsigned long flags; - phys_addr_t db; + unsigned long cache_key = vgic_its_cache_key(devid, eventid); + struct vgic_irq *old; /* Do not cache a directly injected interrupt */ if (irq->hw) return; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); - - if (unlikely(list_empty(&dist->lpi_translation_cache))) - goto out; - - /* - * We could have raced with another CPU caching the same - * translation behind our back, so let's check it is not in - * already - */ - db = its->vgic_its_base + GITS_TRANSLATER; - if (__vgic_its_check_cache(dist, db, devid, eventid)) - goto out; - - /* Always reuse the last entry (LRU policy) */ - cte = list_last_entry(&dist->lpi_translation_cache, - typeof(*cte), entry); - - /* - * Caching the translation implies having an extra reference - * to the interrupt, so drop the potential reference on what - * was in the cache, and increment it on the new interrupt. - */ - if (cte->irq) - vgic_put_irq(kvm, cte->irq); - /* * The irq refcount is guaranteed to be nonzero while holding the * its_lock, as the ITE (and the reference it holds) cannot be freed. @@ -661,39 +573,44 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, lockdep_assert_held(&its->its_lock); vgic_get_irq_kref(irq); - cte->db = db; - cte->devid = devid; - cte->eventid = eventid; - cte->irq = irq; + /* + * We could have raced with another CPU caching the same + * translation behind our back, ensure we don't leak a + * reference if that is the case. + */ + old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); + if (old) + vgic_put_irq(kvm, old); +} - /* Move the new translation to the head of the list */ - list_move(&cte->entry, &dist->lpi_translation_cache); +static void vgic_its_invalidate_cache(struct vgic_its *its) +{ + struct kvm *kvm = its->dev->kvm; + struct vgic_irq *irq; + unsigned long idx; -out: - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + xa_for_each(&its->translation_cache, idx, irq) { + xa_erase(&its->translation_cache, idx); + vgic_put_irq(kvm, irq); + } } -void vgic_its_invalidate_cache(struct kvm *kvm) +void vgic_its_invalidate_all_caches(struct kvm *kvm) { - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_translation_cache_entry *cte; - unsigned long flags; + struct kvm_device *dev; + struct vgic_its *its; - raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + rcu_read_lock(); - list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { - /* - * If we hit a NULL entry, there is nothing after this - * point. - */ - if (!cte->irq) - break; + list_for_each_entry_rcu(dev, &kvm->devices, vm_node) { + if (dev->ops != &kvm_arm_vgic_its_ops) + continue; - vgic_put_irq(kvm, cte->irq); - cte->irq = NULL; + its = dev->private; + vgic_its_invalidate_cache(its); } - raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + rcu_read_unlock(); } int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, @@ -725,8 +642,6 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) { u64 address; - struct kvm_io_device *kvm_io_dev; - struct vgic_io_device *iodev; if (!vgic_has_its(kvm)) return ERR_PTR(-ENODEV); @@ -736,18 +651,7 @@ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) address = (u64)msi->address_hi << 32 | msi->address_lo; - kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); - if (!kvm_io_dev) - return ERR_PTR(-EINVAL); - - if (kvm_io_dev->ops != &kvm_io_gic_ops) - return ERR_PTR(-EINVAL); - - iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); - if (iodev->iodev_type != IODEV_ITS) - return ERR_PTR(-EINVAL); - - return iodev->its; + return __vgic_doorbell_to_its(kvm, address); } /* @@ -883,7 +787,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, * don't bother here since we clear the ITTE anyway and the * pending state is a property of the ITTE struct. */ - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); its_free_ite(kvm, ite); return 0; @@ -920,7 +824,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, ite->collection = collection; vcpu = collection_to_vcpu(kvm, collection); - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); return update_affinity(ite->irq, vcpu); } @@ -955,7 +859,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, switch (type) { case GITS_BASER_TYPE_DEVICE: - if (id >= BIT_ULL(VITS_TYPER_DEVBITS)) + if (id > VITS_MAX_DEVID) return false; break; case GITS_BASER_TYPE_COLLECTION: @@ -1167,7 +1071,8 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } /* Requires the its_lock to be held. */ -static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) +static void vgic_its_free_device(struct kvm *kvm, struct vgic_its *its, + struct its_device *device) { struct its_ite *ite, *temp; @@ -1179,7 +1084,7 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) its_free_ite(kvm, ite); - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); list_del(&device->dev_list); kfree(device); @@ -1191,7 +1096,7 @@ static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) struct its_device *cur, *temp; list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) - vgic_its_free_device(kvm, cur); + vgic_its_free_device(kvm, its, cur); } /* its lock must be held */ @@ -1250,7 +1155,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * by removing the mapping and re-establishing it. */ if (device) - vgic_its_free_device(kvm, device); + vgic_its_free_device(kvm, its, device); /* * The spec does not say whether unmapping a not-mapped device @@ -1281,7 +1186,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, if (!valid) { vgic_its_free_collection(its, coll_id); - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); } else { struct kvm_vcpu *vcpu; @@ -1372,23 +1277,19 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, int vgic_its_invall(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int irq_count, i = 0; - u32 *intids; - - irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); - if (irq_count < 0) - return irq_count; + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long intid; - for (i = 0; i < irq_count; i++) { - struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]); + xa_for_each(&dist->lpi_xa, intid, irq) { + irq = vgic_get_irq(kvm, NULL, intid); if (!irq) continue; + update_lpi_config(kvm, irq, vcpu, false); vgic_put_irq(kvm, irq); } - kfree(intids); - if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); @@ -1431,10 +1332,10 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { + struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; - u32 *intids; - int irq_count, i; + unsigned long intid; /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */ vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); @@ -1446,12 +1347,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, if (vcpu1 == vcpu2) return 0; - irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); - if (irq_count < 0) - return irq_count; - - for (i = 0; i < irq_count; i++) { - irq = vgic_get_irq(kvm, NULL, intids[i]); + xa_for_each(&dist->lpi_xa, intid, irq) { + irq = vgic_get_irq(kvm, NULL, intid); if (!irq) continue; @@ -1460,9 +1357,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vgic_put_irq(kvm, irq); } - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); - kfree(intids); return 0; } @@ -1813,7 +1709,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, its->enabled = !!(val & GITS_CTLR_ENABLE); if (!its->enabled) - vgic_its_invalidate_cache(kvm); + vgic_its_invalidate_cache(its); /* * Try to process any pending commands. This function bails out early @@ -1914,47 +1810,6 @@ out: return ret; } -/* Default is 16 cached LPIs per vcpu */ -#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 - -void vgic_lpi_translation_cache_init(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - unsigned int sz; - int i; - - if (!list_empty(&dist->lpi_translation_cache)) - return; - - sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; - - for (i = 0; i < sz; i++) { - struct vgic_translation_cache_entry *cte; - - /* An allocation failure is not fatal */ - cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT); - if (WARN_ON(!cte)) - break; - - INIT_LIST_HEAD(&cte->entry); - list_add(&cte->entry, &dist->lpi_translation_cache); - } -} - -void vgic_lpi_translation_cache_destroy(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_translation_cache_entry *cte, *tmp; - - vgic_its_invalidate_cache(kvm); - - list_for_each_entry_safe(cte, tmp, - &dist->lpi_translation_cache, entry) { - list_del(&cte->entry); - kfree(cte); - } -} - #define INITIAL_BASER_VALUE \ (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ @@ -1987,8 +1842,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) kfree(its); return ret; } - - vgic_lpi_translation_cache_init(dev->kvm); } mutex_init(&its->its_lock); @@ -2006,6 +1859,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) INIT_LIST_HEAD(&its->device_list); INIT_LIST_HEAD(&its->collection_list); + xa_init(&its->translation_cache); dev->kvm->arch.vgic.msis_require_devid = true; dev->kvm->arch.vgic.has_its = true; @@ -2036,6 +1890,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) vgic_its_free_device_list(kvm, its); vgic_its_free_collection_list(kvm, its); + vgic_its_invalidate_cache(its); + xa_destroy(&its->translation_cache); mutex_unlock(&its->its_lock); kfree(its); @@ -2438,7 +2294,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, ret = vgic_its_restore_itt(its, dev); if (ret) { - vgic_its_free_device(its->dev->kvm, dev); + vgic_its_free_device(its->dev->kvm, its, dev); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index c15ee1df036a..a3983a631b5a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -277,7 +277,7 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, return; vgic_flush_pending_lpis(vcpu); - vgic_its_invalidate_cache(vcpu->kvm); + vgic_its_invalidate_all_caches(vcpu->kvm); atomic_set_release(&vgic_cpu->ctlr, 0); } else { ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0, diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7e9cdb78f7ce..ae5a44d5702d 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - - cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); -} - void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 4ea3340786b9..ed6e412cd74b 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - /* - * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen - * is dependent on ICC_SRE_EL1.SRE, and we have to perform the - * VMCR_EL2 save/restore in the world switch. - */ - if (likely(cpu_if->vgic_sre)) - kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); - - kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if); + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); if (has_vhe()) __vgic_v3_activate_traps(cpu_if); @@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) WARN_ON(vgic_v4_load(vcpu)); } -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - - if (likely(cpu_if->vgic_sre)) - cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); -} - void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); WARN_ON(vgic_v4_put(vcpu)); - vgic_v3_vmcr_sync(vcpu); - - kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); - if (has_vhe()) __vgic_v3_deactivate_traps(cpu_if); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 4ec93587c8cd..f07b3ddff7d4 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -29,9 +29,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * its->cmd_lock (mutex) * its->its_lock (mutex) * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower @@ -126,7 +125,6 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) __xa_erase(&dist->lpi_xa, irq->intid); xa_unlock_irqrestore(&dist->lpi_xa, flags); - atomic_dec(&dist->lpi_count); kfree_rcu(irq, rcu); } @@ -939,17 +937,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) -{ - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - return; - - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_vmcr_sync(vcpu); - else - vgic_v3_vmcr_sync(vcpu); -} - int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0c2b82de8fa3..6106ebd5ba42 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -16,6 +16,7 @@ #define INTERRUPT_ID_BITS_SPIS 10 #define INTERRUPT_ID_BITS_ITS 16 +#define VGIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1) #define VGIC_PRI_BITS 5 #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) @@ -214,7 +215,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -253,7 +253,6 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); @@ -330,14 +329,11 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) } bool vgic_lpis_enabled(struct kvm_vcpu *vcpu); -int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); -void vgic_lpi_translation_cache_init(struct kvm *kvm); -void vgic_lpi_translation_cache_destroy(struct kvm *kvm); -void vgic_its_invalidate_cache(struct kvm *kvm); +void vgic_its_invalidate_all_caches(struct kvm *kvm); /* GICv4.1 MMIO interface */ int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 03efd86dce0a..9b5ab6818f7f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,6 +32,7 @@ #include <linux/hugetlb.h> #include <linux/acpi_iort.h> #include <linux/kmemleak.h> +#include <linux/execmem.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -432,3 +433,142 @@ void dump_mem_limit(void) pr_emerg("Memory Limit: none\n"); } } + +#ifdef CONFIG_EXECMEM +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; + +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) +{ + u64 max_pgoff, pgoff; + + if ((end - start) >= size) + return 0; + + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); + + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; + } else { + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); + } + + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); + + return 0; +} + +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start = 0, end = 0; + + module_init_limits(); + + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. + */ + if (module_direct_base) { + start = module_direct_base; + end = module_direct_base + SZ_128M; + + if (module_plt_base) { + fallback_start = module_plt_base; + fallback_end = module_plt_base + SZ_2G; + } + } else if (module_plt_base) { + start = module_plt_base; + end = module_plt_base + SZ_2G; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = PAGE_KERNEL, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_ROX, + .alignment = 1, + }, + [EXECMEM_BPF] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d88711467bb..720336d28856 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1897,17 +1897,6 @@ u64 bpf_jit_alloc_exec_limit(void) return VMALLOC_END - VMALLOC_START; } -void *bpf_jit_alloc_exec(unsigned long size) -{ - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(vmalloc(size)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} - /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index 834cffcfbce3..7ba4b98076de 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -12,6 +12,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; struct pt_regs *regs; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 54ad04dacdee..42331d9a8dd7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -632,6 +632,15 @@ config RANDOMIZE_BASE_MAX_OFFSET source "kernel/livepatch/Kconfig" +config PARAVIRT + bool "Enable paravirtualization code" + depends on AS_HAS_LVZ_EXTENSION + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + endmenu config ARCH_SELECT_MEMORY_MODEL diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 2dbec7853ae8..c862672ed953 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -26,4 +26,3 @@ generic-y += poll.h generic-y += param.h generic-y += posix_types.h generic-y += resource.h -generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 0ef3b18f8980..d41138abcf26 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -14,9 +14,15 @@ extern void ack_bad_irq(unsigned int irq); #define NR_IPI 2 +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNCTION, +}; + typedef struct { unsigned int ipi_irqs[NR_IPI]; unsigned int __softirq_pending; + atomic_t message ____cacheline_aligned_in_smp; } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index d8f637f9e400..c3993fd88aba 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -12,6 +12,7 @@ #define INSN_NOP 0x03400000 #define INSN_BREAK 0x002a0000 +#define INSN_HVCL 0x002b8000 #define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 #define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 @@ -67,6 +68,7 @@ enum reg2_op { revhd_op = 0x11, extwh_op = 0x16, extwb_op = 0x17, + cpucfg_op = 0x1b, iocsrrdb_op = 0x19200, iocsrrdh_op = 0x19201, iocsrrdw_op = 0x19202, diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index 218b4da0ea90..480418bc5071 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,16 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); +static inline int get_percpu_irq(int vector) +{ + struct irq_domain *d; + + d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); + if (d) + return irq_create_mapping(d, vector); + + return -EINVAL; +} #include <asm-generic/irq.h> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 2d62f7b0d377..c87b6ea0ec47 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -31,6 +31,11 @@ #define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_GUESTDBG_SW_BP_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) +#define KVM_GUESTDBG_VALID_MASK \ + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) + struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 pages; @@ -43,6 +48,7 @@ struct kvm_vcpu_stat { u64 idle_exits; u64 cpucfg_exits; u64 signal_exits; + u64 hypercall_exits; }; #define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) @@ -64,6 +70,31 @@ struct kvm_world_switch { #define MAX_PGTABLE_LEVELS 4 +/* + * Physical CPUID is used for interrupt routing, there are different + * definitions about physical cpuid on different hardwares. + * + * For LOONGARCH_CSR_CPUID register, max CPUID size if 512 + * For IPI hardware, max destination CPUID size 1024 + * For extioi interrupt controller, max destination CPUID size is 256 + * For msgint interrupt controller, max supported CPUID size is 65536 + * + * Currently max CPUID is defined as 256 for KVM hypervisor, in future + * it will be expanded to 4096, including 16 packages at most. And every + * package supports at most 256 vcpus + */ +#define KVM_MAX_PHYID 256 + +struct kvm_phyid_info { + struct kvm_vcpu *vcpu; + bool enabled; +}; + +struct kvm_phyid_map { + int max_phyid; + struct kvm_phyid_info phys_map[KVM_MAX_PHYID]; +}; + struct kvm_arch { /* Guest physical mm */ kvm_pte_t *pgd; @@ -71,6 +102,8 @@ struct kvm_arch { unsigned long invalid_ptes[MAX_PGTABLE_LEVELS]; unsigned int pte_shifts[MAX_PGTABLE_LEVELS]; unsigned int root_level; + spinlock_t phyid_map_lock; + struct kvm_phyid_map *phyid_map; s64 time_offset; struct kvm_context __percpu *vmcs; @@ -203,7 +236,6 @@ void kvm_flush_tlb_all(void); void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa); int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write); -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h new file mode 100644 index 000000000000..4ba2312e5f8c --- /dev/null +++ b/arch/loongarch/include/asm/kvm_para.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_KVM_PARA_H +#define _ASM_LOONGARCH_KVM_PARA_H + +/* + * Hypercall code field + */ +#define HYPERVISOR_KVM 1 +#define HYPERVISOR_VENDOR_SHIFT 8 +#define HYPERCALL_ENCODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code) + +#define KVM_HCALL_CODE_SERVICE 0 +#define KVM_HCALL_CODE_SWDBG 1 + +#define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) +#define KVM_HCALL_FUNC_IPI 1 + +#define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) + +/* + * LoongArch hypercall return code + */ +#define KVM_HCALL_SUCCESS 0 +#define KVM_HCALL_INVALID_CODE -1UL +#define KVM_HCALL_INVALID_PARAMETER -2UL + +/* + * Hypercall interface for KVM hypervisor + * + * a0: function identifier + * a1-a6: args + * Return value will be placed in a0. + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + */ +static __always_inline long kvm_hypercall0(u64 fid) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall2(u64 fid, + unsigned long arg0, unsigned long arg1) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall3(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2), "r" (a3) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall4(u64 fid, + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "memory" + ); + + return ret; +} + +static __always_inline long kvm_hypercall5(u64 fid, + unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + register long ret asm("a0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + register unsigned long a5 asm("a5") = arg4; + + __asm__ __volatile__( + "hvcl "__stringify(KVM_HCALL_SERVICE) + : "=r" (ret) + : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) + : "memory" + ); + + return ret; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +#endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 0cb4fdb8a9b5..590a92cb5416 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu); void kvm_restore_timer(struct kvm_vcpu *vcpu); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid); /* * Loongarch KVM guest interrupt handling @@ -109,4 +110,14 @@ static inline int kvm_queue_exception(struct kvm_vcpu *vcpu, return -1; } +static inline unsigned long kvm_read_reg(struct kvm_vcpu *vcpu, int num) +{ + return vcpu->arch.gprs[num]; +} + +static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long val) +{ + vcpu->arch.gprs[num] = val; +} + #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 46366e783c84..eb09adda54b7 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -158,6 +158,18 @@ #define CPUCFG48_VFPU_CG BIT(2) #define CPUCFG48_RAM_CG BIT(3) +/* + * CPUCFG index area: 0x40000000 -- 0x400000ff + * SW emulation for KVM hypervirsor + */ +#define CPUCFG_KVM_BASE 0x40000000 +#define CPUCFG_KVM_SIZE 0x100 + +#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0) +#define KVM_SIGNATURE "KVM\0" +#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) +#define KVM_FEATURE_IPI BIT(1) + #ifndef __ASSEMBLY__ /* CSR */ diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h new file mode 100644 index 000000000000..0965710f47f2 --- /dev/null +++ b/arch/loongarch/include/asm/paravirt.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_PARAVIRT_H +#define _ASM_LOONGARCH_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT + +#include <linux/static_call_types.h> +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 dummy_steal_clock(int cpu); +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +int __init pv_ipi_init(void); + +#else + +static inline int pv_ipi_init(void) +{ + return 0; +} + +#endif // CONFIG_PARAVIRT +#endif diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/loongarch/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index f81e5f01d619..1c51bdf3516a 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -12,6 +12,13 @@ #include <linux/threads.h> #include <linux/cpumask.h> +struct smp_ops { + void (*init_ipi)(void); + void (*send_ipi_single)(int cpu, unsigned int action); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); +}; +extern struct smp_ops mp_ops; + extern int smp_num_siblings; extern int num_processors; extern int disabled_cpus; @@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus); void loongson_boot_secondary(int cpu, struct task_struct *idle); void loongson_init_secondary(void); void loongson_smp_finish(void); -void loongson_send_ipi_single(int cpu, unsigned int action); -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); @@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS]; #define cpu_physical_id(cpu) cpu_logical_map(cpu) -#define SMP_BOOT_CPU 0x1 -#define SMP_RESCHEDULE 0x2 -#define SMP_CALL_FUNCTION 0x4 +#define ACTION_BOOT_CPU 0 +#define ACTION_RESCHEDULE 1 +#define ACTION_CALL_FUNCTION 2 +#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU) +#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE) +#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION) struct secondary_data { unsigned long stack; @@ -81,12 +89,12 @@ extern void show_ipi_list(struct seq_file *p, int prec); static inline void arch_send_call_function_single_ipi(int cpu) { - loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); + mp_ops.send_ipi_single(cpu, ACTION_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); + mp_ops.send_ipi_mask(mask, ACTION_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index 109785922cf9..f9abef382317 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -17,6 +17,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 + /* * for KVM_GET_REGS and KVM_SET_REGS */ @@ -72,6 +74,8 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 3a7620b66bc6..c9bfeda89e40 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index 73858c9029cc..bff058317062 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -287,6 +287,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; struct kprobe_ctlblk *kcb; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 883e5066ae44..f4991c03514f 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void) acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse); } -static int __init get_ipi_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_IPI); - - return -EINVAL; -} - void __init init_IRQ(void) { int i; -#ifdef CONFIG_SMP - int r, ipi_irq; - static int ipi_dummy_dev; -#endif unsigned int order = get_order(IRQ_STACK_SIZE); struct page *page; @@ -113,13 +99,7 @@ void __init init_IRQ(void) init_vec_parent_group(); irqchip_init(); #ifdef CONFIG_SMP - ipi_irq = get_ipi_irq(); - if (ipi_irq < 0) - panic("IPI IRQ mapping failed\n"); - irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); - if (r < 0) - panic("IPI IRQ request failed\n"); + mp_ops.init_ipi(); #endif for (i = 0; i < NR_IRQS; i++) @@ -133,5 +113,5 @@ void __init init_IRQ(void) per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); } - set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); + set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); } diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index c7d0338d12c1..36d6d9eeb7c7 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -490,12 +490,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); -} - static void module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c new file mode 100644 index 000000000000..1633ed4f692f --- /dev/null +++ b/arch/loongarch/kernel/paravirt.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/jump_label.h> +#include <linux/kvm_para.h> +#include <linux/static_call.h> +#include <asm/paravirt.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +#ifdef CONFIG_SMP +static void pv_send_ipi_single(int cpu, unsigned int action) +{ + int min, old; + irq_cpustat_t *info = &per_cpu(irq_stat, cpu); + + old = atomic_fetch_or(BIT(action), &info->message); + if (old) + return; + + min = cpu_logical_map(cpu); + kvm_hypercall3(KVM_HCALL_FUNC_IPI, 1, 0, min); +} + +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) + +static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + int i, cpu, min = 0, max = 0, old; + __uint128_t bitmap = 0; + irq_cpustat_t *info; + + if (cpumask_empty(mask)) + return; + + action = BIT(action); + for_each_cpu(i, mask) { + info = &per_cpu(irq_stat, i); + old = atomic_fetch_or(action, &info->message); + if (old) + continue; + + cpu = cpu_logical_map(i); + if (!bitmap) { + min = max = cpu; + } else if (cpu < min && cpu > (max - KVM_IPI_CLUSTER_SIZE)) { + /* cpu < min, and bitmap still enough */ + bitmap <<= min - cpu; + min = cpu; + } else if (cpu > min && cpu < (min + KVM_IPI_CLUSTER_SIZE)) { + /* cpu > min, and bitmap still enough */ + max = cpu > max ? cpu : max; + } else { + /* + * With cpu, bitmap will exceed KVM_IPI_CLUSTER_SIZE, + * send IPI here directly and skip the remaining CPUs. + */ + kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); + min = max = cpu; + bitmap = 0; + } + __set_bit(cpu - min, (unsigned long *)&bitmap); + } + + if (bitmap) + kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap, + (unsigned long)(bitmap >> BITS_PER_LONG), min); +} + +static irqreturn_t pv_ipi_interrupt(int irq, void *dev) +{ + u32 action; + irq_cpustat_t *info; + + /* Clear SWI interrupt */ + clear_csr_estat(1 << INT_SWI0); + info = this_cpu_ptr(&irq_stat); + action = atomic_xchg(&info->message, 0); + + if (action & SMP_RESCHEDULE) { + scheduler_ipi(); + info->ipi_irqs[IPI_RESCHEDULE]++; + } + + if (action & SMP_CALL_FUNCTION) { + generic_smp_call_function_interrupt(); + info->ipi_irqs[IPI_CALL_FUNCTION]++; + } + + return IRQ_HANDLED; +} + +static void pv_init_ipi(void) +{ + int r, swi; + + swi = get_percpu_irq(INT_SWI0); + if (swi < 0) + panic("SWI0 IRQ mapping failed\n"); + irq_set_percpu_devid(swi); + r = request_percpu_irq(swi, pv_ipi_interrupt, "SWI0-IPI", &irq_stat); + if (r < 0) + panic("SWI0 IRQ request failed\n"); +} +#endif + +static bool kvm_para_available(void) +{ + int config; + static int hypervisor_type; + + if (!hypervisor_type) { + config = read_cpucfg(CPUCFG_KVM_SIG); + if (!memcmp(&config, KVM_SIGNATURE, 4)) + hypervisor_type = HYPERVISOR_KVM; + } + + return hypervisor_type == HYPERVISOR_KVM; +} + +int __init pv_ipi_init(void) +{ + int feature; + + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + feature = read_cpucfg(CPUCFG_KVM_FEATURE); + if (!(feature & KVM_FEATURE_IPI)) + return 0; + +#ifdef CONFIG_SMP + mp_ops.init_ipi = pv_init_ipi; + mp_ops.send_ipi_single = pv_send_ipi_single; + mp_ops.send_ipi_mask = pv_send_ipi_mask; +#endif + + return 0; +} diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index cac7cba81b65..f86a4b838dd7 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu) static DEFINE_MUTEX(pmu_reserve_mutex); static atomic_t active_events = ATOMIC_INIT(0); -static int get_pmc_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_PCOV); - - return -EINVAL; -} - static void reset_counters(void *arg); static int __hw_perf_event_init(struct perf_event *event); @@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { on_each_cpu(reset_counters, NULL, 1); - free_irq(get_pmc_irq(), &loongarch_pmu); + free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); mutex_unlock(&pmu_reserve_mutex); } } @@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event) if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; - irq = get_pmc_irq(); + irq = get_percpu_irq(INT_PCOV); flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; if (!atomic_inc_not_zero(&active_events)) { mutex_lock(&pmu_reserve_mutex); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index aabee0b280fe..0dfe2388ef41 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -29,6 +29,7 @@ #include <asm/loongson.h> #include <asm/mmu_context.h> #include <asm/numa.h> +#include <asm/paravirt.h> #include <asm/processor.h> #include <asm/setup.h> #include <asm/time.h> @@ -66,11 +67,6 @@ static cpumask_t cpu_core_setup_map; struct secondary_data cpuboot_data; static DEFINE_PER_CPU(int, cpu_state); -enum ipi_msg_type { - IPI_RESCHEDULE, - IPI_CALL_FUNCTION, -}; - static const char *ipi_types[NR_IPI] __tracepoint_string = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNCTION] = "Function call interrupts", @@ -190,24 +186,19 @@ static u32 ipi_read_clear(int cpu) static void ipi_write_action(int cpu, u32 action) { - unsigned int irq = 0; - - while ((irq = ffs(action))) { - uint32_t val = IOCSR_IPI_SEND_BLOCKING; + uint32_t val; - val |= (irq - 1); - val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); - iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); - action &= ~BIT(irq - 1); - } + val = IOCSR_IPI_SEND_BLOCKING | action; + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); } -void loongson_send_ipi_single(int cpu, unsigned int action) +static void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) +static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -222,11 +213,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) */ void arch_smp_send_reschedule(int cpu) { - loongson_send_ipi_single(cpu, SMP_RESCHEDULE); + mp_ops.send_ipi_single(cpu, ACTION_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); -irqreturn_t loongson_ipi_interrupt(int irq, void *dev) +static irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -246,6 +237,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } +static void loongson_init_ipi(void) +{ + int r, ipi_irq; + + ipi_irq = get_percpu_irq(INT_IPI); + if (ipi_irq < 0) + panic("IPI IRQ mapping failed\n"); + + irq_set_percpu_devid(ipi_irq); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat); + if (r < 0) + panic("IPI IRQ request failed\n"); +} + +struct smp_ops mp_ops = { + .init_ipi = loongson_init_ipi, + .send_ipi_single = loongson_send_ipi_single, + .send_ipi_mask = loongson_send_ipi_mask, +}; + static void __init fdt_smp_setup(void) { #ifdef CONFIG_OF @@ -289,6 +300,7 @@ void __init loongson_smp_setup(void) cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; + pv_ipi_init(); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } @@ -323,7 +335,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, ACTION_BOOT_CPU); } /* @@ -333,7 +345,7 @@ void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | - ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; + ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER | ECFGF_SIP0; change_csr_ecfg(ECFG0_IM, imask); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e7015f7b70e3..fd5354f9be7c 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -123,16 +123,6 @@ void sync_counter(void) csr_write64(init_offset, LOONGARCH_CSR_CNTC); } -static int get_timer_irq(void) -{ - struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY); - - if (d) - return irq_create_mapping(d, INT_TI); - - return -EINVAL; -} - int constant_clockevent_init(void) { unsigned int cpu = smp_processor_id(); @@ -142,7 +132,7 @@ int constant_clockevent_init(void) static int irq = 0, timer_irq_installed = 0; if (!timer_irq_installed) { - irq = get_timer_irq(); + irq = get_percpu_irq(INT_TI); if (irq < 0) pr_err("Failed to map irq %d (timer)\n", irq); } diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ed1d89d53e2e..c86e099af5ca 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/preempt.h> #include <linux/vmalloc.h> +#include <trace/events/kvm.h> #include <asm/fpu.h> #include <asm/inst.h> #include <asm/loongarch.h> @@ -20,6 +21,46 @@ #include <asm/kvm_vcpu.h> #include "trace.h" +static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) +{ + int rd, rj; + unsigned int index; + + if (inst.reg2_format.opcode != cpucfg_op) + return EMULATE_FAIL; + + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + + /* + * By LoongArch Reference Manual 2.2.10.5 + * Return value is 0 for undefined CPUCFG index + * + * Disable preemption since hw gcsr is accessed + */ + preempt_disable(); + switch (index) { + case 0 ... (KVM_MAX_CPUCFG_REGS - 1): + vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; + break; + case CPUCFG_KVM_SIG: + /* CPUCFG emulation between 0x40000000 -- 0x400000ff */ + vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; + break; + case CPUCFG_KVM_FEATURE: + vcpu->arch.gprs[rd] = KVM_FEATURE_IPI; + break; + default: + vcpu->arch.gprs[rd] = 0; + break; + } + preempt_enable(); + + return EMULATE_DONE; +} + static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid) { unsigned long val = 0; @@ -208,8 +249,6 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) { - int rd, rj; - unsigned int index; unsigned long curr_pc; larch_inst inst; enum emulation_result er = EMULATE_DONE; @@ -224,21 +263,7 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) er = EMULATE_FAIL; switch (((inst.word >> 24) & 0xff)) { case 0x0: /* CPUCFG GSPR */ - if (inst.reg2_format.opcode == 0x1B) { - rd = inst.reg2_format.rd; - rj = inst.reg2_format.rj; - ++vcpu->stat.cpucfg_exits; - index = vcpu->arch.gprs[rj]; - er = EMULATE_DONE; - /* - * By LoongArch Reference Manual 2.2.10.5 - * return value is 0 for undefined cpucfg index - */ - if (index < KVM_MAX_CPUCFG_REGS) - vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index]; - else - vcpu->arch.gprs[rd] = 0; - } + er = kvm_emu_cpucfg(vcpu, inst); break; case 0x4: /* CSR{RD,WR,XCHG} GSPR */ er = kvm_handle_csr(vcpu, inst); @@ -417,6 +442,8 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) vcpu->arch.io_gpr = rd; run->mmio.is_write = 0; vcpu->mmio_is_write = 0; + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, run->mmio.len, + run->mmio.phys_addr, NULL); } else { kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", inst.word, vcpu->arch.pc, vcpu->arch.badv); @@ -463,6 +490,9 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) break; } + trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, + run->mmio.phys_addr, run->mmio.data); + return er; } @@ -564,6 +594,8 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) run->mmio.is_write = 1; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, + run->mmio.phys_addr, data); } else { vcpu->arch.pc = curr_pc; kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n", @@ -685,6 +717,90 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu) +{ + unsigned int min, cpu, i; + unsigned long ipi_bitmap; + struct kvm_vcpu *dest; + + min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3); + for (i = 0; i < 2; i++, min += BITS_PER_LONG) { + ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i); + if (!ipi_bitmap) + continue; + + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); + while (cpu < BITS_PER_LONG) { + dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1); + if (!dest) + continue; + + /* Send SWI0 to dest vcpu to emulate IPI interrupt */ + kvm_queue_irq(dest, INT_SWI0); + kvm_vcpu_kick(dest); + } + } + + return 0; +} + +/* + * Hypercall emulation always return to guest, Caller should check retval. + */ +static void kvm_handle_service(struct kvm_vcpu *vcpu) +{ + unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0); + long ret; + + switch (func) { + case KVM_HCALL_FUNC_IPI: + kvm_send_pv_ipi(vcpu); + ret = KVM_HCALL_SUCCESS; + break; + default: + ret = KVM_HCALL_INVALID_CODE; + break; + }; + + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret); +} + +static int kvm_handle_hypercall(struct kvm_vcpu *vcpu) +{ + int ret; + larch_inst inst; + unsigned int code; + + inst.word = vcpu->arch.badi; + code = inst.reg0i15_format.immediate; + ret = RESUME_GUEST; + + switch (code) { + case KVM_HCALL_SERVICE: + vcpu->stat.hypercall_exits++; + kvm_handle_service(vcpu); + break; + case KVM_HCALL_SWDBG: + /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */ + if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) { + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + ret = RESUME_HOST; + break; + } + fallthrough; + default: + /* Treat it as noop intruction, only set return value */ + kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE); + break; + } + + if (ret == RESUME_GUEST) + update_pc(&vcpu->arch); + + return ret; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -716,6 +832,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, + [EXCCODE_HVC] = kvm_handle_hypercall, }; int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a556cff35740..98883aa23ab8 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) range->end << PAGE_SHIFT, &ctx); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - unsigned long prot_bits; - kvm_pte_t *ptep; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - gpa_t gpa = range->start << PAGE_SHIFT; - - ptep = kvm_populate_gpa(kvm, NULL, gpa, 0); - if (!ptep) - return false; - - /* Replacing an absent or old page doesn't need flushes */ - if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) { - kvm_set_pte(ptep, 0); - return false; - } - - /* Fill new pte if write protected or page migrated */ - prot_bits = _PAGE_PRESENT | __READABLE; - prot_bits |= _CACHE_MASK & pte_val(range->arg.pte); - - /* - * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support - * _PAGE_WRITE for map_page_fast if next page write fault - * _PAGE_DIRTY since gpa has already recorded as dirty page - */ - prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte); - kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits))); - - return true; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_ptw_ctx ctx; diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index c2484ad4cffa..1783397b1bc8 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -19,14 +19,16 @@ DECLARE_EVENT_CLASS(kvm_transition, TP_PROTO(struct kvm_vcpu *vcpu), TP_ARGS(vcpu), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; ), - TP_printk("PC: 0x%08lx", __entry->pc) + TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc) ); DEFINE_EVENT(kvm_transition, kvm_enter, @@ -54,19 +56,22 @@ DECLARE_EVENT_CLASS(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), - TP_printk("[%s]PC: 0x%08lx", - __print_symbolic(__entry->reason, - kvm_trace_symbol_exit_types), - __entry->pc) + TP_printk("vcpu %u [%s] PC: 0x%08lx", + __entry->vcpu_id, + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) ); DEFINE_EVENT(kvm_exit, kvm_exit_idle, @@ -85,14 +90,17 @@ TRACE_EVENT(kvm_exit_gspr, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word), TP_ARGS(vcpu, inst_word), TP_STRUCT__entry( + __field(unsigned int, vcpu_id) __field(unsigned int, inst_word) ), TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; __entry->inst_word = inst_word; ), - TP_printk("Inst word: 0x%08x", __entry->inst_word) + TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id, + __entry->inst_word) ); #define KVM_TRACE_AUX_SAVE 0 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 3a8779065f73..9e8030d45129 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, idle_exits), STATS_DESC_COUNTER(VCPU, cpucfg_exits), STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, hypercall_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -247,7 +248,101 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) + vcpu->guest_debug = dbg->control; + else + vcpu->guest_debug = 0; + + return 0; +} + +static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + if (val >= KVM_MAX_PHYID) + return -EINVAL; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) { + /* Discard duplicated CPUID set operation */ + if (cpuid == val) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + /* + * CPUID is already set before + * Forbid changing to a different CPUID at runtime + */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + if (map->phys_map[val].enabled) { + /* Discard duplicated CPUID set operation */ + if (vcpu == map->phys_map[val].vcpu) { + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return 0; + } + + /* + * New CPUID is already set with other vcpu + * Forbid sharing the same CPUID between different vcpus + */ + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + return -EINVAL; + } + + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val); + map->phys_map[val].enabled = true; + map->phys_map[val].vcpu = vcpu; + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); + + return 0; +} + +static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu) +{ + int cpuid; + struct kvm_phyid_map *map; + struct loongarch_csrs *csr = vcpu->arch.csr; + + map = vcpu->kvm->arch.phyid_map; + cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID); + + if (cpuid >= KVM_MAX_PHYID) + return; + + spin_lock(&vcpu->kvm->arch.phyid_map_lock); + if (map->phys_map[cpuid].enabled) { + map->phys_map[cpuid].vcpu = NULL; + map->phys_map[cpuid].enabled = false; + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); + } + spin_unlock(&vcpu->kvm->arch.phyid_map_lock); +} + +struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid) +{ + struct kvm_phyid_map *map; + + if (cpuid >= KVM_MAX_PHYID) + return NULL; + + map = kvm->arch.phyid_map; + if (!map->phys_map[cpuid].enabled) + return NULL; + + return map->phys_map[cpuid].vcpu; } static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) @@ -282,6 +377,9 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) if (get_gcsr_flag(id) & INVALID_GCSR) return -EINVAL; + if (id == LOONGARCH_CSR_CPUID) + return kvm_set_cpuid(vcpu, val); + if (id == LOONGARCH_CSR_ESTAT) { /* ESTAT IP0~IP7 inject through GINTC */ gintc = (val >> 2) & 0xff; @@ -409,6 +507,9 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_LOONGARCH_COUNTER: *v = drdtime() + vcpu->kvm->arch.time_offset; break; + case KVM_REG_LOONGARCH_DEBUG_INST: + *v = INSN_HVCL | KVM_HCALL_SWDBG; + break; default: ret = -EINVAL; break; @@ -924,6 +1025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Set cpuid */ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id); + kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID); /* Start with no pending virtual guest interrupts */ csr->csrs[LOONGARCH_CSR_GINTC] = 0; @@ -942,6 +1044,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.swtimer); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + kvm_drop_cpuid(vcpu); kfree(vcpu->arch.csr); /* diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 0a37f6fa8f2d..6b2e4f66ad26 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.pgd) return -ENOMEM; + kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT); + if (!kvm->arch.phyid_map) { + free_page((unsigned long)kvm->arch.pgd); + kvm->arch.pgd = NULL; + return -ENOMEM; + } + spin_lock_init(&kvm->arch.phyid_map_lock); + kvm_init_vmcs(kvm); kvm->arch.gpa_size = BIT(cpu_vabits - 1); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; @@ -52,6 +60,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); free_page((unsigned long)kvm->arch.pgd); kvm->arch.pgd = NULL; + kvfree(kvm->arch.phyid_map); + kvm->arch.phyid_map = NULL; } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -66,6 +76,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_IOEVENTFD: case KVM_CAP_MP_STATE: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 4dd53427f657..bf789d114c2d 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -24,6 +24,7 @@ #include <linux/gfp.h> #include <linux/hugetlb.h> #include <linux/mmzone.h> +#include <linux/execmem.h> #include <asm/asm-offsets.h> #include <asm/bootinfo.h> @@ -248,3 +249,23 @@ EXPORT_SYMBOL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; EXPORT_SYMBOL(invalid_pte_table); + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/m68k/include/asm/pgtable.h b/arch/m68k/include/asm/pgtable.h index 27525c6a12fd..49fcfd734860 100644 --- a/arch/m68k/include/asm/pgtable.h +++ b/arch/m68k/include/asm/pgtable.h @@ -2,6 +2,8 @@ #ifndef __M68K_PGTABLE_H #define __M68K_PGTABLE_H +#include <asm/page.h> + #ifdef __uClinux__ #include <asm/pgtable_no.h> #else diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 4da7bc4ac4a3..176314f3c9aa 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -4,7 +4,7 @@ CONFIG_AUDIT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_EXPERT=y -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y CONFIG_KALLSYMS_ALL=y CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1 diff --git a/arch/mips/configs/rs90_defconfig b/arch/mips/configs/rs90_defconfig index 4b9e36d6400e..a53dd66e9b86 100644 --- a/arch/mips/configs/rs90_defconfig +++ b/arch/mips/configs/rs90_defconfig @@ -9,7 +9,7 @@ CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y # CONFIG_SGETMASK_SYSCALL is not set # CONFIG_SYSFS_SYSCALL is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_TIMERFD is not set # CONFIG_AIO is not set # CONFIG_IO_URING is not set diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 20ca48c1b606..c0109aff223b 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -147,8 +147,8 @@ #if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \ VMALLOC_START != CKSSEG /* Load modules into 32bit-compatible segment. */ -#define MODULE_START CKSSEG -#define MODULE_END (FIXADDR_START-2*PAGE_SIZE) +#define MODULES_VADDR CKSSEG +#define MODULES_END (FIXADDR_START-2*PAGE_SIZE) #endif #define pte_ERROR(e) \ diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 7b2fbaa9cac5..ba0f62d8eff5 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -13,7 +13,6 @@ #include <linux/elf.h> #include <linux/mm.h> #include <linux/numa.h> -#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/string.h> @@ -31,15 +30,6 @@ struct mips_hi16 { static LIST_HEAD(dbe_list); static DEFINE_SPINLOCK(dbe_lock); -#ifdef MODULE_START -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - static void apply_r_mips_32(u32 *location, u32 base, Elf_Addr v) { *location = base + v; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 467ee6b95ae1..c17157e700c0 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return true; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - gpa_t gpa = range->start << PAGE_SHIFT; - pte_t hva_pte = range->arg.pte; - pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); - pte_t old_pte; - - if (!gpa_pte) - return false; - - /* Mapping may need adjusting depending on memslot flags */ - old_pte = *gpa_pte; - if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) - hva_pte = pte_mkclean(hva_pte); - else if (range->slot->flags & KVM_MEM_READONLY) - hva_pte = pte_wrprotect(hva_pte); - - set_pte(gpa_pte, hva_pte); - - /* Replacing an absent or old page doesn't need flushes */ - if (!pte_present(old_pte) || !pte_young(old_pte)) - return false; - - /* Pages swapped, aged, moved, or cleaned require flushes */ - return !pte_present(hva_pte) || - !pte_young(hva_pte) || - pte_pfn(old_pte) != pte_pfn(hva_pte) || - (pte_dirty(old_pte) && !pte_dirty(hva_pte)); -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index aaa9a242ebba..37fedeaca2e9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned long write, if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto VMALLOC_FAULT_TARGET; -#ifdef MODULE_START - if (unlikely(address >= MODULE_START && address < MODULE_END)) +#ifdef MODULES_VADDR + if (unlikely(address >= MODULES_VADDR && address < MODULES_END)) goto VMALLOC_FAULT_TARGET; #endif diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 39f129205b0c..4583d1a2a73e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -31,6 +31,7 @@ #include <linux/gfp.h> #include <linux/kcore.h> #include <linux/initrd.h> +#include <linux/execmem.h> #include <asm/bootinfo.h> #include <asm/cachectl.h> @@ -576,3 +577,25 @@ EXPORT_SYMBOL_GPL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; EXPORT_SYMBOL(invalid_pte_table); + +#ifdef CONFIG_EXECMEM +#ifdef MODULES_VADDR +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif +#endif /* CONFIG_EXECMEM */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index d052dfcbe8d3..eab87c6beacb 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -25,7 +25,10 @@ #include <asm-generic/pgtable-nopmd.h> #define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE -#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1) +#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1) + +#define MODULES_VADDR (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M) +#define MODULES_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1) struct mm_struct; diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c index 76e0a42d6e36..f4483243578d 100644 --- a/arch/nios2/kernel/module.c +++ b/arch/nios2/kernel/module.c @@ -13,7 +13,6 @@ #include <linux/moduleloader.h> #include <linux/elf.h> #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/string.h> @@ -21,25 +20,6 @@ #include <asm/cacheflush.h> -/* - * Modules should NOT be allocated with kmalloc for (obvious) reasons. - * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach - * from 0x80000000 (vmalloc area) to 0xc00000000 (kernel) (kmalloc returns - * addresses in 0xc0000000) - */ -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return kmalloc(size, GFP_KERNEL); -} - -/* Free memory returned from module_alloc */ -void module_memfree(void *module_region) -{ - kfree(module_region); -} - int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *mod) diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 7bc82ee889c9..3459df28afee 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -26,6 +26,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/binfmts.h> +#include <linux/execmem.h> #include <asm/setup.h> #include <asm/page.h> @@ -143,3 +144,23 @@ static const pgprot_t protection_map[16] = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = MKP(1, 1, 1) }; DECLARE_VM_GET_PAGE_PROT + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_EXEC, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index ee4febb30386..5ce258f3fffa 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -131,7 +131,7 @@ CONFIG_PPDEV=m CONFIG_I2C=y CONFIG_HWMON=m CONFIG_DRM=m -CONFIG_DRM_DP_CEC=y +CONFIG_DRM_DISPLAY_DP_AUX_CEC=y # CONFIG_DRM_I2C_CH7006 is not set # CONFIG_DRM_I2C_SIL164 is not set CONFIG_DRM_RADEON=m diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index ad4e15d12ed1..4bea2e95798f 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -8,6 +8,7 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #ifndef __ASSEMBLY__ diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h index 715c96ba2ec8..e84883c6b4c7 100644 --- a/arch/parisc/include/asm/signal.h +++ b/arch/parisc/include/asm/signal.h @@ -4,23 +4,11 @@ #include <uapi/asm/signal.h> -#define _NSIG 64 -/* bits-per-word, where word apparently means 'long' not 'int' */ -#define _NSIG_BPW BITS_PER_LONG -#define _NSIG_WORDS (_NSIG / _NSIG_BPW) - # ifndef __ASSEMBLY__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ -typedef unsigned long old_sigset_t; /* at least 32 bits */ - -typedef struct { - /* next_signal() assumes this is a long - no choice */ - unsigned long sig[_NSIG_WORDS]; -} sigset_t; - #include <asm/sigcontext.h> #endif /* !__ASSEMBLY */ diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index 8e4895c5ea5d..40d7a574c5dd 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -57,10 +57,20 @@ #include <asm-generic/signal-defs.h> +#define _NSIG 64 +#define _NSIG_BPW (sizeof(unsigned long) * 8) +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + # ifndef __ASSEMBLY__ # include <linux/types.h> +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + /* Avoid too many header ordering problems. */ struct siginfo; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 621a4b386ae4..c91f9c2e61ed 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -206,6 +206,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index d214bbe3c2af..4e5d991b2b65 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -41,7 +41,6 @@ #include <linux/moduleloader.h> #include <linux/elf.h> -#include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/ftrace.h> #include <linux/string.h> @@ -173,17 +172,6 @@ static inline int reassemble_22(int as22) ((as22 & 0x0003ff) << 3)); } -void *module_alloc(unsigned long size) -{ - /* using RWX means less protection for modules, but it's - * easier than trying to map the text, data, init_text and - * init_data correctly */ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, - PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - #ifndef CONFIG_64BIT static inline unsigned long count_gots(const Elf_Rela *rela, unsigned long n) { diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 6ce427b58836..34495446e051 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -26,12 +26,6 @@ #define FPUDEBUG 0 -/* Format of the floating-point exception registers. */ -struct exc_reg { - unsigned int exception : 6; - unsigned int ei : 26; -}; - /* Macros for grabbing bits of the instruction format from the 'ei' field above. */ /* Major opcode 0c and 0e */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index f876af56e13f..34d91cb8b259 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -24,6 +24,7 @@ #include <linux/nodemask.h> /* for node_online_map */ #include <linux/pagemap.h> /* for release_pages */ #include <linux/compat.h> +#include <linux/execmem.h> #include <asm/pgalloc.h> #include <asm/tlb.h> @@ -481,7 +482,7 @@ void free_initmem(void) /* finally dump all the instructions which were cached, since the * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - + free_initmem_default(POISON_FREE_INITMEM); /* set up a new led state on systems shipped LED State panel */ @@ -992,3 +993,23 @@ static const pgprot_t protection_map[16] = { [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX }; DECLARE_VM_GET_PAGE_PROT + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_RWX, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 22cd0d55a892..571f260b0842 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -Wa,--fatal-warnings +subdir-asflags-$(CONFIG_PPC_WERROR) := -Wa,--fatal-warnings obj-y += kernel/ obj-y += mm/ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c45fa9d7fb76..70e118f140eb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -286,7 +286,7 @@ config PPC select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING - select KASAN_VMALLOC if KASAN && MODULES + select KASAN_VMALLOC if KASAN && EXECMEM select LOCK_MM_AND_FIND_VMA select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE @@ -687,6 +687,10 @@ config ARCH_SELECTS_CRASH_DUMP depends on CRASH_DUMP select RELOCATABLE if PPC64 || 44x || PPC_85xx +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y + depends on PPC64 + config FA_DUMP bool "Firmware-assisted dump" depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 65261cbe5bfd..0a0c57aee1ae 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -114,7 +114,6 @@ LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) ifdef CONFIG_PPC64 ifndef CONFIG_PPC_KERNEL_PCREL -ifeq ($(call cc-option-yn,-mcmodel=medium),y) # -mcmodel=medium breaks modules because it uses 32bit offsets from # the TOC pointer to create pointers where possible. Pointers into the # percpu data area are created by this method. @@ -124,9 +123,6 @@ ifeq ($(call cc-option-yn,-mcmodel=medium),y) # kernel percpu data space (starting with 0xc...). We need a full # 64bit relocation for this to work, hence -mcmodel=large. KBUILD_CFLAGS_MODULE += -mcmodel=large -else - export NO_MINIMAL_TOC := -mno-minimal-toc -endif endif endif @@ -139,7 +135,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) endif endif -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) +CFLAGS-$(CONFIG_PPC64) += -mcmodel=medium CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mlong-double-128) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 968aee2025b8..9c2b6e527ed1 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -108,8 +108,8 @@ DTC_FLAGS ?= -p 1024 # these files into the build dir, fix up any includes and ensure that dependent # files are copied in the right order. -# these need to be seperate variables because they are copied out of different -# directories in the kernel tree. Sure you COULd merge them, but it's a +# these need to be separate variables because they are copied out of different +# directories in the kernel tree. Sure you COULD merge them, but it's a # cure-is-worse-than-disease situation. zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c index 977eb15a6d17..6835cb53f034 100644 --- a/arch/powerpc/boot/decompress.c +++ b/arch/powerpc/boot/decompress.c @@ -101,7 +101,7 @@ static void print_err(char *s) * @input_size: length of the input buffer * @outbuf: output buffer * @output_size: length of the output buffer - * @skip number of output bytes to ignore + * @_skip: number of output bytes to ignore * * This function takes compressed data from inbuf, decompresses and write it to * outbuf. Once output_size bytes are written to the output buffer, or the diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts index deb52e41ab84..5fedda811378 100644 --- a/arch/powerpc/boot/dts/acadia.dts +++ b/arch/powerpc/boot/dts/acadia.dts @@ -172,7 +172,7 @@ reg = <0xef602800 0x60>; interrupt-parent = <&UIC0>; interrupts = <0x4 0x4>; - /* This thing is a bit weird. It has it's own UIC + /* This thing is a bit weird. It has its own UIC * that it uses to generate snapshot triggers. We * don't really support this device yet, and it needs * work to figure this out. diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index 4f044b41a776..fb3200b006ad 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts index 8da984251abc..0ba86a6dce1b 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts +++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts @@ -15,7 +15,7 @@ device_type = "memory"; }; - board_ifc: ifc: ifc@ff71e000 { + board_ifc: ifc: memory-controller@ff71e000 { /* NAND Flash on board */ ranges = <0x0 0x0 0x0 0xff800000 0x00004000>; reg = <0x0 0xff71e000 0x0 0x2000>; diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi index 2a677fd323eb..5c53cee8755f 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <16 2 0 0 20 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts index 7cb2158dfe58..ce642e879a1b 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts +++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts @@ -15,7 +15,7 @@ device_type = "memory"; }; - ifc: ifc@ff71e000 { + ifc: memory-controller@ff71e000 { /* NOR, NAND Flash on board */ ranges = <0x0 0x0 0x0 0x88000000 0x08000000 0x1 0x0 0x0 0xff800000 0x00010000>; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi index b8e0edd1ac69..4da451e000d9 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; /* FIXME: Test whether interrupts are split */ interrupts = <16 2 0 0 20 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts index 5e905e0857cf..e2fdac2ed420 100644 --- a/arch/powerpc/boot/dts/fsl/c293pcie.dts +++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts @@ -42,7 +42,7 @@ device_type = "memory"; }; - ifc: ifc@fffe1e000 { + ifc: memory-controller@fffe1e000 { reg = <0xf 0xffe1e000 0 0x2000>; ranges = <0x0 0x0 0xf 0xec000000 0x04000000 0x1 0x0 0xf 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi index f208fb8f64b3..2d443d519274 100644 --- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <19 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi index 41935709ebe8..fba40a1bccc0 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi @@ -199,6 +199,10 @@ /include/ "pq3-dma-0.dtsi" /include/ "pq3-etsec1-0.dtsi" + enet0: ethernet@24000 { + fsl,wake-on-filer; + fsl,pmc-handle = <&etsec1_clk>; + }; /include/ "pq3-etsec1-timer-0.dtsi" usb@22000 { @@ -222,9 +226,10 @@ }; /include/ "pq3-etsec1-2.dtsi" - - ethernet@26000 { + enet2: ethernet@26000 { cell-index = <1>; + fsl,wake-on-filer; + fsl,pmc-handle = <&etsec3_clk>; }; usb@2b000 { @@ -249,4 +254,9 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" + power@e0070 { + compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc"; + }; }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi index b68eb119faef..ea7416af7ee3 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi @@ -188,4 +188,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi index 579d76cb8e32..dddb7374508d 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi @@ -156,4 +156,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi index 49294cf36b4e..40a6cff77032 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi @@ -193,4 +193,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts index 3a94acbb3c03..ce3346d77858 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts +++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts @@ -29,3 +29,19 @@ }; /include/ "p1010si-post.dtsi" + +&pci0 { + pcie@0 { + interrupt-map = < + /* IDSEL 0x0 */ + /* + *irq[4:5] are active-high + *irq[6:7] are active-low + */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 + >; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts index 4cf255fedc96..83590354f9a0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts +++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts @@ -56,3 +56,19 @@ }; /include/ "p1010si-post.dtsi" + +&pci0 { + pcie@0 { + interrupt-map = < + /* IDSEL 0x0 */ + /* + *irq[4:5] are active-high + *irq[6:7] are active-low + */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 + >; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi index 2ca9cee2ddeb..ef49a7d6c69d 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi @@ -215,19 +215,3 @@ phy-connection-type = "sgmii"; }; }; - -&pci0 { - pcie@0 { - interrupt-map = < - /* IDSEL 0x0 */ - /* - *irq[4:5] are active-high - *irq[6:7] are active-low - */ - 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 - 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 - 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 - 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 - >; - }; -}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi index fdc19aab2f70..583a6cd05079 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi @@ -36,7 +36,7 @@ memory { device_type = "memory"; }; -board_ifc: ifc: ifc@ffe1e000 { +board_ifc: ifc: memory-controller@ffe1e000 { /* NOR, NAND Flashes and CPLD on board */ ranges = <0x0 0x0 0x0 0xee000000 0x02000000 0x1 0x0 0x0 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi index de2fceed4f79..4d41efe0038f 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi @@ -36,7 +36,7 @@ memory { device_type = "memory"; }; -board_ifc: ifc: ifc@fffe1e000 { +board_ifc: ifc: memory-controller@fffe1e000 { /* NOR, NAND Flashes and CPLD on board */ ranges = <0x0 0x0 0xf 0xee000000 0x02000000 0x1 0x0 0xf 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index ccda0a91abf0..2d2550729dcc 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <16 2 0 0 19 2 0 0>; }; @@ -183,9 +183,23 @@ /include/ "pq3-etsec2-1.dtsi" /include/ "pq3-etsec2-2.dtsi" + enet0: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; + }; + + enet1: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; + }; + + enet2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; + }; + global-utilities@e0000 { compatible = "fsl,p1010-guts"; reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi index 642dc3a83d0e..cc4c7461003b 100644 --- a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi @@ -163,14 +163,17 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; }; /include/ "pq3-etsec2-2.dtsi" enet2: enet2_grp2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; }; global-utilities@e0000 { @@ -178,6 +181,8 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; /include/ "pq3-etsec2-grp2-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi index 407cb5fd0f5b..378195db9fca 100644 --- a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi @@ -159,14 +159,17 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; }; /include/ "pq3-etsec2-2.dtsi" enet2: enet2_grp2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; }; global-utilities@e0000 { @@ -174,6 +177,8 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; &qe { diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi index 093e4e3ed368..6ac21e81344a 100644 --- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi @@ -225,11 +225,13 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { fsl,wake-on-filer; + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { fsl,wake-on-filer; + fsl,pmc-handle = <&etsec2_clk>; }; global-utilities@e0000 { @@ -238,9 +240,10 @@ fsl,has-rstcr; }; +/include/ "pq3-power.dtsi" power@e0070 { - compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc"; - reg = <0xe0070 0x20>; + compatible = "fsl,p1022-pmc", "fsl,mpc8536-pmc", + "fsl,mpc8548-pmc"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi index 81b9ab2119be..d410082d21c0 100644 --- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi @@ -178,6 +178,10 @@ compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr"; }; /include/ "pq3-etsec1-0.dtsi" + enet0: ethernet@24000 { + fsl,pmc-handle = <&etsec1_clk>; + + }; /include/ "pq3-etsec1-timer-0.dtsi" ptp_clock@24e00 { @@ -186,7 +190,15 @@ /include/ "pq3-etsec1-1.dtsi" + enet1: ethernet@25000 { + fsl,pmc-handle = <&etsec2_clk>; + }; + /include/ "pq3-etsec1-2.dtsi" + enet2: ethernet@26000 { + fsl,pmc-handle = <&etsec3_clk>; + }; + /include/ "pq3-esdhc-0.dtsi" sdhc@2e000 { compatible = "fsl,p2020-esdhc", "fsl,esdhc"; @@ -202,8 +214,5 @@ fsl,has-rstcr; }; - pmc: power@e0070 { - compatible = "fsl,mpc8548-pmc"; - reg = <0xe0070 0x20>; - }; +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/pq3-power.dtsi b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi new file mode 100644 index 000000000000..6af12401004d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: (GPL-2.0+) +/* + * Copyright 2024 NXP + */ + +power@e0070 { + compatible = "fsl,mpc8548-pmc"; + reg = <0xe0070 0x20>; + + etsec1_clk: soc-clk@24 { + fsl,pmcdr-mask = <0x00000080>; + }; + etsec2_clk: soc-clk@25 { + fsl,pmcdr-mask = <0x00000040>; + }; + etsec3_clk: soc-clk@26 { + fsl,pmcdr-mask = <0x00000020>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index aa5152ca8120..8ef0c020206b 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -52,7 +52,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 270aaf631f2a..7d003e07a9fb 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -91,7 +91,7 @@ board-control@2,0 { #address-cells = <1>; #size-cells = <1>; - compatible = "fsl,t1024-cpld"; + compatible = "fsl,t1024-cpld", "fsl,deepsleep-cpld"; reg = <3 0 0x300>; ranges = <0 3 0 0x300>; bank-width = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index dd3aab81e9de..4347924e9aa7 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -104,7 +104,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1040rdb-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 776788623204..c9542b73bd7f 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -52,7 +52,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts index 3ebb712224cb..099764322b33 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts @@ -68,7 +68,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1042rdb-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts index 8ec3ff45e6fc..b10cab1a347b 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts @@ -41,7 +41,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1042rdb_pi-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 27714dc2f04a..6bb95878d39d 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index fcac73486d48..65f3e17c0d41 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index cae31a6e8f02..2c0e2a1cab01 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -188,7 +188,7 @@ static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { } /* A buffer that may be edited by tools operating on a zImage binary so as to * edit the command line passed to vmlinux (by setting /chosen/bootargs). - * The buffer is put in it's own section so that tools may locate it easier. + * The buffer is put in its own section so that tools may locate it easier. */ static char cmdline[BOOT_COMMAND_LINE_SIZE] __attribute__((__section__("__builtin_cmdline"))); diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index f157717ae814..89ff46b8b225 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -25,7 +25,7 @@ BSS_STACK(4096); /* A buffer that may be edited by tools operating on a zImage binary so as to * edit the command line passed to vmlinux (by setting /chosen/bootargs). - * The buffer is put in it's own section so that tools may locate it easier. + * The buffer is put in its own section so that tools may locate it easier. */ static char cmdline[BOOT_COMMAND_LINE_SIZE] diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 7f35d5bc1229..97f4d4851735 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index a98ef6a4abef..50cc59eb36cf 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5c56d36cdfc5..6f449411abf7 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 56b876e418e9..77306be62e9e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -7,7 +7,7 @@ CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 083c2e57520a..383c0966e92f 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_MODULES=y diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 727d4b321937..0efabccd820c 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -29,7 +29,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) #endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG - if (!static_key_initialized) { + if (!static_key_feature_checks_initialized) { printk("Warning! cpu_has_feature() used prior to jump label init!\n"); dump_stack(); return early_cpu_has_feature(feature); diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 514dd056c2c8..91a9fd53254f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -82,7 +82,7 @@ struct eeh_pe { int false_positives; /* Times of reported #ff's */ atomic_t pass_dev_cnt; /* Count of passed through devs */ struct eeh_pe *parent; /* Parent PE */ - void *data; /* PE auxillary data */ + void *data; /* PE auxiliary data */ struct list_head child_list; /* List of PEs below this PE */ struct list_head child; /* Memb. child_list/eeh_phb_pe */ struct list_head edevs; /* List of eeh_dev in this PE */ diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 27f9e11eda28..e83869a4eb6a 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -42,13 +42,38 @@ static inline u64 fadump_str_to_u64(const char *str) #define FADUMP_CPU_UNKNOWN (~((u32)0)) -#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF") +/* + * The introduction of new fields in the fadump crash info header has + * led to a change in the magic key from `FADMPINF` to `FADMPSIG` for + * identifying a kernel crash from an old kernel. + * + * To prevent the need for further changes to the magic number in the + * event of future modifications to the fadump crash info header, a + * version field has been introduced to track the fadump crash info + * header version. + * + * Consider a few points before adding new members to the fadump crash info + * header structure: + * + * - Append new members; avoid adding them in between. + * - Non-primitive members should have a size member as well. + * - For every change in the fadump header, increment the + * fadump header version. This helps the updated kernel decide how to + * handle kernel dumps from older kernels. + */ +#define FADUMP_CRASH_INFO_MAGIC_OLD fadump_str_to_u64("FADMPINF") +#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPSIG") +#define FADUMP_HEADER_VERSION 1 /* fadump crash info structure */ struct fadump_crash_info_header { u64 magic_number; - u64 elfcorehdr_addr; + u32 version; u32 crashing_cpu; + u64 vmcoreinfo_raddr; + u64 vmcoreinfo_size; + u32 pt_regs_sz; + u32 cpu_mask_sz; struct pt_regs regs; struct cpumask cpu_mask; }; @@ -94,9 +119,13 @@ struct fw_dump { u64 boot_mem_regs_cnt; unsigned long fadumphdr_addr; + u64 elfcorehdr_addr; + u64 elfcorehdr_size; unsigned long cpu_notes_buf_vaddr; unsigned long cpu_notes_buf_size; + unsigned long param_area; + /* * Maximum size supported by firmware to copy from source to * destination address per entry. @@ -111,6 +140,7 @@ struct fw_dump { unsigned long dump_active:1; unsigned long dump_registered:1; unsigned long nocma:1; + unsigned long param_area_supported:1; struct fadump_ops *ops; }; @@ -129,6 +159,7 @@ struct fadump_ops { struct seq_file *m); void (*fadump_trigger)(struct fadump_crash_info_header *fdh, const char *msg); + int (*fadump_max_boot_mem_rgns)(void); }; /* Helper functions */ @@ -136,7 +167,6 @@ s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus); void fadump_free_cpu_notes_buf(void); u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); void __init fadump_update_elfcore_header(char *bufp); -bool is_fadump_boot_mem_contiguous(void); bool is_fadump_reserved_mem_contiguous(void); #else /* !CONFIG_PRESERVE_FA_DUMP */ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 526a6a647312..ef40c9b6972a 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -19,12 +19,14 @@ extern int is_fadump_active(void); extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); +extern void fadump_append_bootargs(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } +static inline void fadump_append_bootargs(void) { } #endif /* !CONFIG_FA_DUMP */ #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 77824bd289a3..17d168dd8b49 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -291,6 +291,8 @@ extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; +extern bool static_key_feature_checks_initialized; + void apply_feature_fixups(void); void update_mmu_feature_fixups(unsigned long mask); void setup_feature_keys(void); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a41e542ba94d..7a8495660c2f 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -524,7 +524,7 @@ long plpar_hcall_norets_notrace(unsigned long opcode, ...); * Used for all but the craziest of phyp interfaces (see plpar_hcall9) */ #define PLPAR_HCALL_BUFSIZE 4 -long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...); /** * plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats @@ -538,7 +538,7 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); * plpar_hcall, but plpar_hcall_raw works in real mode and does not * calculate hypervisor call statistics. */ -long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...); /** * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments @@ -549,8 +549,8 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); * PLPAR_HCALL9_BUFSIZE to size the return argument buffer. */ #define PLPAR_HCALL9_BUFSIZE 9 -long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); -long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +long plpar_hcall9(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...); +long plpar_hcall9_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...); /* pseries hcall tracing */ extern struct static_key hcall_tracepoint_key; @@ -570,7 +570,7 @@ struct hvcall_mpp_data { unsigned long backing_mem; }; -int h_get_mpp(struct hvcall_mpp_data *); +long h_get_mpp(struct hvcall_mpp_data *mpp_data); struct hvcall_mpp_x_data { unsigned long coalesced_bytes; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 7b610864b364..2d6c886b40f4 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -336,6 +336,14 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte if (IS_ENABLED(CONFIG_KASAN)) return; + /* + * Likewise, do not use it in real mode if percpu first chunk is not + * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there + * are chances where percpu allocation can come from vmalloc area. + */ + if (percpu_first_chunk_is_paged) + return; + /* Otherwise, it should be safe to call it */ nmi_enter(); } @@ -351,6 +359,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter // no nmi_exit for a pseries hash guest taking a real mode exception } else if (IS_ENABLED(CONFIG_KASAN)) { // no nmi_exit for KASAN in real mode + } else if (percpu_first_chunk_is_paged) { + // no nmi_exit if percpu first chunk is not embedded } else { nmi_exit(); } diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 08c550ed49be..52e1b1d15ff6 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -37,7 +37,7 @@ extern struct pci_dev *isa_bridge_pcidev; * define properly based on the platform */ #ifndef CONFIG_PCI -#define _IO_BASE 0 +#define _IO_BASE POISON_POINTER_DELTA #define _ISA_MEM_BASE 0 #define PCI_DRAM_OFFSET 0 #elif defined(CONFIG_PPC32) @@ -585,12 +585,12 @@ __do_out_asm(_rec_outl, "stwbrx") #define __do_inw(port) _rec_inw(port) #define __do_inl(port) _rec_inl(port) #else /* CONFIG_PPC32 */ -#define __do_outb(val, port) writeb(val,(PCI_IO_ADDR)_IO_BASE+port); -#define __do_outw(val, port) writew(val,(PCI_IO_ADDR)_IO_BASE+port); -#define __do_outl(val, port) writel(val,(PCI_IO_ADDR)_IO_BASE+port); -#define __do_inb(port) readb((PCI_IO_ADDR)_IO_BASE + port); -#define __do_inw(port) readw((PCI_IO_ADDR)_IO_BASE + port); -#define __do_inl(port) readl((PCI_IO_ADDR)_IO_BASE + port); +#define __do_outb(val, port) writeb(val,(PCI_IO_ADDR)(_IO_BASE+port)); +#define __do_outw(val, port) writew(val,(PCI_IO_ADDR)(_IO_BASE+port)); +#define __do_outl(val, port) writel(val,(PCI_IO_ADDR)(_IO_BASE+port)); +#define __do_inb(port) readb((PCI_IO_ADDR)(_IO_BASE + port)); +#define __do_inw(port) readw((PCI_IO_ADDR)(_IO_BASE + port)); +#define __do_inl(port) readl((PCI_IO_ADDR)(_IO_BASE + port)); #endif /* !CONFIG_PPC32 */ #ifdef CONFIG_EEH @@ -606,12 +606,12 @@ __do_out_asm(_rec_outl, "stwbrx") #define __do_writesw(a, b, n) _outsw(PCI_FIX_ADDR(a),(b),(n)) #define __do_writesl(a, b, n) _outsl(PCI_FIX_ADDR(a),(b),(n)) -#define __do_insb(p, b, n) readsb((PCI_IO_ADDR)_IO_BASE+(p), (b), (n)) -#define __do_insw(p, b, n) readsw((PCI_IO_ADDR)_IO_BASE+(p), (b), (n)) -#define __do_insl(p, b, n) readsl((PCI_IO_ADDR)_IO_BASE+(p), (b), (n)) -#define __do_outsb(p, b, n) writesb((PCI_IO_ADDR)_IO_BASE+(p),(b),(n)) -#define __do_outsw(p, b, n) writesw((PCI_IO_ADDR)_IO_BASE+(p),(b),(n)) -#define __do_outsl(p, b, n) writesl((PCI_IO_ADDR)_IO_BASE+(p),(b),(n)) +#define __do_insb(p, b, n) readsb((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n)) +#define __do_insw(p, b, n) readsw((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n)) +#define __do_insl(p, b, n) readsl((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n)) +#define __do_outsb(p, b, n) writesb((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n)) +#define __do_outsw(p, b, n) writesw((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n)) +#define __do_outsl(p, b, n) writesl((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n)) #define __do_memset_io(addr, c, n) \ _memset_io(PCI_FIX_ADDR(addr), c, n) @@ -982,7 +982,7 @@ static inline phys_addr_t page_to_phys(struct page *page) } /* - * 32 bits still uses virt_to_bus() for it's implementation of DMA + * 32 bits still uses virt_to_bus() for its implementation of DMA * mappings se we have to keep it defined here. We also have some old * drivers (shame shame shame) that use bus_to_virt() and haven't been * fixed yet so I need to define it here. diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 365d2720097c..b5bbb94c51f6 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32) +#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32) #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index fdb90e24dc74..95a98b390d62 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -135,6 +135,17 @@ static inline void crash_setup_regs(struct pt_regs *newregs, ppc_save_regs(newregs); } +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif /* CONFIG_CRASH_HOTPLUG */ + extern int crashing_cpu; extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); extern void crash_ipi_callback(struct pt_regs *regs); @@ -185,6 +196,10 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif /* CONFIG_CRASH_DUMP */ +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +int update_cpus_node(void *fdt); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kexec.h> #endif diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index f83866a19e87..14055896cbcb 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,19 +7,9 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int add_tce_mem_ranges(struct crash_mem **mem_ranges); -int add_initrd_mem_range(struct crash_mem **mem_ranges); -#ifdef CONFIG_PPC_64S_HASH_MMU -int add_htab_mem_range(struct crash_mem **mem_ranges); -#else -static inline int add_htab_mem_range(struct crash_mem **mem_ranges) -{ - return 0; -} -#endif -int add_kernel_mem_range(struct crash_mem **mem_ranges); -int add_rtas_mem_range(struct crash_mem **mem_ranges); -int add_opal_mem_range(struct crash_mem **mem_ranges); -int add_reserved_mem_ranges(struct crash_mem **mem_ranges); - +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int get_exclude_memory_ranges(struct crash_mem **mem_ranges); +int get_reserved_memory_ranges(struct crash_mem **mem_ranges); +int get_crash_memory_ranges(struct crash_mem **mem_ranges); +int get_usable_memory_ranges(struct crash_mem **mem_ranges); #endif /* _ASM_POWERPC_KEXEC_RANGES_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3281215097cc..ca3829d47ab7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -287,7 +287,6 @@ struct kvmppc_ops { bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); - bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 3b72c7ed24cf..24f830cf9bb4 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -251,7 +251,7 @@ static __always_inline bool mmu_has_feature(unsigned long feature) #endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG - if (!static_key_initialized) { + if (!static_key_feature_checks_initialized) { printk("Warning! mmu_has_feature() used prior to jump label init!\n"); dump_stack(); return early_mmu_has_feature(feature); diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index a8e2e8339fb7..300c777cc307 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -48,11 +48,6 @@ struct mod_arch_specific { unsigned long tramp; unsigned long tramp_regs; #endif - - /* List of BUG addresses, source line numbers and filenames */ - struct list_head bug_list; - struct bug_entry *bug_table; - unsigned int num_bugs; }; /* diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index a2bc4b95e703..8c9d4b26bf57 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1027,10 +1027,10 @@ struct opal_i2c_request { * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX * with individual elements being 16 bits wide to fetch the system * wide EPOW status. Each element in the buffer will contain the - * EPOW status in it's bit representation for a particular EPOW sub + * EPOW status in its bit representation for a particular EPOW sub * class as defined here. So multiple detailed EPOW status bits * specific for any sub class can be represented in a single buffer - * element as it's bit representation. + * element as its bit representation. */ /* System EPOW type */ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index 8e5b7d0b851c..634970ce13c6 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -15,6 +15,16 @@ #endif /* CONFIG_SMP */ #endif /* __powerpc64__ */ +#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) && defined(CONFIG_SMP) +#include <linux/jump_label.h> +DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); + +#define percpu_first_chunk_is_paged \ + (static_key_enabled(&__percpu_first_chunk_is_paged.key)) +#else +#define percpu_first_chunk_is_paged false +#endif /* CONFIG_PPC64 && CONFIG_SMP */ + #include <asm-generic/percpu.h> #include <asm/paca.h> diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 2495866f2e97..420e2878ae67 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -192,7 +192,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node, /* PMAC_FTR_BMAC_ENABLE (struct device_node* node, 0, int value) * enable/disable the bmac (ethernet) cell of a mac-io ASIC, also drive - * it's reset line + * its reset line */ #define PMAC_FTR_BMAC_ENABLE PMAC_FTR_DEF(6) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 005601243dda..076ae60b4a55 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -510,6 +510,7 @@ #define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LHA(r, base, i) (0xa8000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) @@ -532,6 +533,7 @@ #define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_DIVW(d, a, b) (0x7c0003d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -550,6 +552,8 @@ #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_EXTSB(d, a) (0x7c000774 | ___PPC_RA(d) | ___PPC_RS(a)) +#define PPC_RAW_EXTSH(d, a) (0x7c000734 | ___PPC_RA(d) | ___PPC_RS(a)) #define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a)) #define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) #define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index b2c51d337e60..e44cac0da346 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -260,7 +260,8 @@ struct thread_struct { unsigned long sier2; unsigned long sier3; unsigned long hashkeyr; - + unsigned long dexcr; + unsigned long dexcr_onexec; /* Reset value to load on exec */ #endif }; @@ -333,6 +334,16 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); +#ifdef CONFIG_PPC_BOOK3S_64 + +#define PPC_GET_DEXCR_ASPECT(tsk, asp) get_dexcr_prctl((tsk), (asp)) +#define PPC_SET_DEXCR_ASPECT(tsk, asp, val) set_dexcr_prctl((tsk), (asp), (val)) + +int get_dexcr_prctl(struct task_struct *tsk, unsigned long asp); +int set_dexcr_prctl(struct task_struct *tsk, unsigned long asp, unsigned long val); + +#endif + extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d3d1aea009b4..eed33cb916d0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -615,7 +615,7 @@ #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ #define HID1_PS (1<<16) /* 750FX PLL selection */ #endif -#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_HID2_750FX 0x3F8 /* IBM 750FX HID2 Register */ #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ #define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */ #define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */ diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h index e278299b9b37..6949b5daa37d 100644 --- a/arch/powerpc/include/asm/uninorth.h +++ b/arch/powerpc/include/asm/uninorth.h @@ -144,7 +144,7 @@ #define UNI_N_HWINIT_STATE_SLEEPING 0x01 #define UNI_N_HWINIT_STATE_RUNNING 0x02 /* This last bit appear to be used by the bootROM to know the second - * CPU has started and will enter it's sleep loop with IP=0 + * CPU has started and will enter its sleep loop with IP=0 */ #define UNI_N_HWINIT_STATE_CPU1_FLAG 0x10000000 diff --git a/arch/powerpc/include/uapi/asm/bootx.h b/arch/powerpc/include/uapi/asm/bootx.h index 6728c7e24e58..1b8c121071d9 100644 --- a/arch/powerpc/include/uapi/asm/bootx.h +++ b/arch/powerpc/include/uapi/asm/bootx.h @@ -108,7 +108,7 @@ typedef struct boot_infos /* ALL BELOW NEW (vers. 4) */ /* This defines the physical memory. Valid with BOOT_ARCH_NUBUS flag - (non-PCI) only. On PCI, memory is contiguous and it's size is in the + (non-PCI) only. On PCI, memory is contiguous and its size is in the device-tree. */ boot_info_map_entry_t physMemoryMap[MAX_MEM_MAP_SIZE]; /* Where the phys memory is */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h deleted file mode 100644 index 17439925045c..000000000000 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ /dev/null @@ -1,165 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl - * - * (C) Copyright IBM 2020 - * - * Author: Vaibhav Jain <vaibhav at linux.ibm.com> - */ - -#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ -#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ - -#include <linux/types.h> -#include <linux/ndctl.h> - -/* - * PDSM Envelope: - * - * The ioctl ND_CMD_CALL exchange data between user-space and kernel via - * envelope which consists of 2 headers sections and payload sections as - * illustrated below: - * +-----------------+---------------+---------------------------+ - * | 64-Bytes | 8-Bytes | Max 184-Bytes | - * +-----------------+---------------+---------------------------+ - * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | - * +-----------------+---------------+---------------------------+ - * | nd_family | | | - * | nd_size_out | cmd_status | | - * | nd_size_in | reserved | nd_pdsm_payload | - * | nd_command | payload --> | | - * | nd_fw_size | | | - * | nd_payload ---> | | | - * +---------------+-----------------+---------------------------+ - * - * ND Header: - * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' - * which is interpreted by libnvdimm before passed on to papr_scm. Important - * member fields used are: - * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM - * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) - * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD - * 'nd_command' : (In) One of PAPR_PDSM_XXX - * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned - * - * PDSM Header: - * This is papr-scm specific header that precedes the payload. This is defined - * as nd_cmd_pdsm_pkg. Following fields aare available in this header: - * - * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. - * 'reserved' : Not used, reserved for future and should be set to 0. - * 'payload' : A union of all the possible payload structs - * - * PDSM Payload: - * - * The layout of the PDSM Payload is defined by various structs shared between - * papr_scm and libndctl so that contents of payload can be interpreted. As such - * its defined as a union of all possible payload structs as - * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' - * appropriate member of the union is accessed. - */ - -/* Max payload size that we can handle */ -#define ND_PDSM_PAYLOAD_MAX_SIZE 184 - -/* Max payload size that we can handle */ -#define ND_PDSM_HDR_SIZE \ - (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) - -/* Various nvdimm health indicators */ -#define PAPR_PDSM_DIMM_HEALTHY 0 -#define PAPR_PDSM_DIMM_UNHEALTHY 1 -#define PAPR_PDSM_DIMM_CRITICAL 2 -#define PAPR_PDSM_DIMM_FATAL 3 - -/* struct nd_papr_pdsm_health.extension_flags field flags */ - -/* Indicate that the 'dimm_fuel_gauge' field is valid */ -#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 - -/* Indicate that the 'dimm_dsc' field is valid */ -#define PDSM_DIMM_DSC_VALID 2 - -/* - * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH - * Various flags indicate the health status of the dimm. - * - * extension_flags : Any extension fields present in the struct. - * dimm_unarmed : Dimm not armed. So contents wont persist. - * dimm_bad_shutdown : Previous shutdown did not persist contents. - * dimm_bad_restore : Contents from previous shutdown werent restored. - * dimm_scrubbed : Contents of the dimm have been scrubbed. - * dimm_locked : Contents of the dimm cant be modified until CEC reboot - * dimm_encrypted : Contents of dimm are encrypted. - * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX - * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100 - */ -struct nd_papr_pdsm_health { - union { - struct { - __u32 extension_flags; - __u8 dimm_unarmed; - __u8 dimm_bad_shutdown; - __u8 dimm_bad_restore; - __u8 dimm_scrubbed; - __u8 dimm_locked; - __u8 dimm_encrypted; - __u16 dimm_health; - - /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ - __u16 dimm_fuel_gauge; - - /* Extension flag PDSM_DIMM_DSC_VALID */ - __u64 dimm_dsc; - }; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; - }; -}; - -/* Flags for injecting specific smart errors */ -#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) -#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) - -struct nd_papr_pdsm_smart_inject { - union { - struct { - /* One or more of PDSM_SMART_INJECT_ */ - __u32 flags; - __u8 fatal_enable; - __u8 unsafe_shutdown_enable; - }; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; - }; -}; - -/* - * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel - * via 'nd_cmd_pkg.nd_command' member of the ioctl struct - */ -enum papr_pdsm { - PAPR_PDSM_MIN = 0x0, - PAPR_PDSM_HEALTH, - PAPR_PDSM_SMART_INJECT, - PAPR_PDSM_MAX, -}; - -/* Maximal union that can hold all possible payload types */ -union nd_pdsm_payload { - struct nd_papr_pdsm_health health; - struct nd_papr_pdsm_smart_inject smart_inject; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; -} __packed; - -/* - * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm - * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' - * that should always precede this struct when sent to papr_scm via CMD_CALL - * interface. - */ -struct nd_pkg_pdsm { - __s32 cmd_status; /* Out: Sub-cmd status returned back */ - __u16 reserved[2]; /* Ignored and to be set as '0' */ - union nd_pdsm_payload payload; -} __packed; - -#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index d3282fbea4f2..8585d03c02d3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_PPC64 -CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) -endif ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC @@ -87,6 +84,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o @@ -190,9 +188,6 @@ GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n -GCOV_PROFILE_syscall_64.o := n -KCOV_INSTRUMENT_syscall_64.o := n -UBSAN_SANITIZE_syscall_64.o := n UBSAN_SANITIZE_vdso.o := n # Necessary for booting with kcov enabled on book3e machines diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index bfd3f442e5eb..ab3ca74e6730 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -401,7 +401,7 @@ _GLOBAL(__save_cpu_setup) andi. r3,r3,0xff00 cmpwi cr0,r3,0x0200 bne 1f - mfspr r4,SPRN_HID2 + mfspr r4,SPRN_HID2_750FX stw r4,CS_HID2(r5) 1: mtcr r7 @@ -496,7 +496,7 @@ _GLOBAL(__restore_cpu_setup) bne 4f lwz r4,CS_HID2(r5) rlwinm r4,r4,0,19,17 - mtspr SPRN_HID2,r4 + mtspr SPRN_HID2_750FX,r4 sync 4: lwz r4,CS_HID1(r5) diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c new file mode 100644 index 000000000000..3a0358e91c60 --- /dev/null +++ b/arch/powerpc/kernel/dexcr.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/sched.h> + +#include <asm/cpu_has_feature.h> +#include <asm/cputable.h> +#include <asm/processor.h> +#include <asm/reg.h> + +static int __init init_task_dexcr(void) +{ + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + current->thread.dexcr_onexec = mfspr(SPRN_DEXCR); + + return 0; +} +early_initcall(init_task_dexcr) + +/* Allow thread local configuration of these by default */ +#define DEXCR_PRCTL_EDITABLE ( \ + DEXCR_PR_IBRTPD | \ + DEXCR_PR_SRAPD | \ + DEXCR_PR_NPHIE) + +static int prctl_to_aspect(unsigned long which, unsigned int *aspect) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + *aspect = DEXCR_PR_SBHE; + break; + case PR_PPC_DEXCR_IBRTPD: + *aspect = DEXCR_PR_IBRTPD; + break; + case PR_PPC_DEXCR_SRAPD: + *aspect = DEXCR_PR_SRAPD; + break; + case PR_PPC_DEXCR_NPHIE: + *aspect = DEXCR_PR_NPHIE; + break; + default: + return -ENODEV; + } + + return 0; +} + +int get_dexcr_prctl(struct task_struct *task, unsigned long which) +{ + unsigned int aspect; + int ret; + + ret = prctl_to_aspect(which, &aspect); + if (ret) + return ret; + + if (aspect & DEXCR_PRCTL_EDITABLE) + ret |= PR_PPC_DEXCR_CTRL_EDITABLE; + + if (aspect & mfspr(SPRN_DEXCR)) + ret |= PR_PPC_DEXCR_CTRL_SET; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR; + + if (aspect & task->thread.dexcr_onexec) + ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + + return ret; +} + +int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl) +{ + unsigned long dexcr; + unsigned int aspect; + int err = 0; + + err = prctl_to_aspect(which, &aspect); + if (err) + return err; + + if (!(aspect & DEXCR_PRCTL_EDITABLE)) + return -EPERM; + + if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + return -EINVAL; + + /* + * We do not want an unprivileged process being able to disable + * a setuid process's hash check instructions + */ + if (aspect == DEXCR_PR_NPHIE && + ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + dexcr = mfspr(SPRN_DEXCR); + + if (ctrl & PR_PPC_DEXCR_CTRL_SET) + dexcr |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + dexcr &= ~aspect; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC) + task->thread.dexcr_onexec |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + task->thread.dexcr_onexec &= ~aspect; + + mtspr(SPRN_DEXCR, dexcr); + + return 0; +} diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155ea9..6670063a7a6c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 48773d2d9be3..7efe04c68f0f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -865,9 +865,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index e0ce81279624..d1030bc52564 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** - * eeh_set_pe_aux_size - Set PE auxillary data size - * @size: PE auxillary data size + * eeh_set_pe_aux_size - Set PE auxiliary data size + * @size: PE auxiliary data size in bytes * - * Set PE auxillary data size + * Set PE auxiliary data size. */ void eeh_set_pe_aux_size(int size) { @@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark); * eeh_pe_mark_isolated * @pe: EEH PE * - * Record that a PE has been isolated by marking the PE and it's children as + * Record that a PE has been isolated by marking the PE and its children as * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices * as pci_channel_io_frozen. */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d14eda1e8589..60f974775fc8 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -53,8 +53,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -133,6 +131,41 @@ static int __init fadump_cma_init(void) static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ +/* + * Additional parameters meant for capture kernel are placed in a dedicated area. + * If this is capture kernel boot, append these parameters to bootargs. + */ +void __init fadump_append_bootargs(void) +{ + char *append_args; + size_t len; + + if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) + return; + + if (fw_dump.param_area >= fw_dump.boot_mem_top) { + if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { + pr_warn("WARNING: Can't use additional parameters area!\n"); + fw_dump.param_area = 0; + return; + } + } + + append_args = (char *)fw_dump.param_area; + len = strlen(boot_command_line); + + /* + * Too late to fail even if cmdline size exceeds. Truncate additional parameters + * to cmdline size and proceed anyway. + */ + if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) + pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); + + pr_debug("Cmdline: %s\n", boot_command_line); + snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); + pr_info("Updated cmdline: %s\n", boot_command_line); +} + /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) @@ -223,28 +256,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) } /* - * Returns true, if there are no holes in boot memory area, - * false otherwise. - */ -bool is_fadump_boot_mem_contiguous(void) -{ - unsigned long d_start, d_end; - bool ret = false; - int i; - - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - d_start = fw_dump.boot_mem_addr[i]; - d_end = d_start + fw_dump.boot_mem_sz[i]; - - ret = is_fadump_mem_area_contiguous(d_start, d_end); - if (!ret) - break; - } - - return ret; -} - -/* * Returns true, if there are no holes in reserved memory area, * false otherwise. */ @@ -373,12 +384,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -389,10 +394,11 @@ static unsigned long __init get_fadump_area_size(void) static int __init add_boot_mem_region(unsigned long rstart, unsigned long rsize) { + int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); int i = fw_dump.boot_mem_regs_cnt++; - if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { - fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; + if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { + fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; return 0; } @@ -573,22 +579,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else @@ -705,7 +695,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * old_cpu == -1 means this is the first CPU which has come here, * go ahead and trigger fadump. * - * old_cpu != -1 means some other CPU has already on it's way + * old_cpu != -1 means some other CPU has already on its way * to trigger fadump, just keep looping here. */ this_cpu = smp_processor_id(); @@ -931,36 +921,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -998,52 +958,6 @@ static int fadump_init_elfcore_header(char *bufp) } /* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - -/* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved * for saving initial boot memory contents. @@ -1073,36 +987,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1112,55 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1175,14 +1114,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1190,8 +1140,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1200,18 +1148,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1230,7 +1170,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1416,6 +1355,22 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } +static void fadump_free_elfcorehdr_buf(void) +{ + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) + return; + + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} + static void fadump_invalidate_release_mem(void) { mutex_lock(&fadump_mutex); @@ -1427,6 +1382,7 @@ static void fadump_invalidate_release_mem(void) fadump_cleanup(); mutex_unlock(&fadump_mutex); + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1484,6 +1440,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1498,6 +1466,43 @@ static ssize_t registered_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.dump_registered); } +static ssize_t bootargs_append_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); +} + +static ssize_t bootargs_append_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *params; + + if (!fw_dump.fadump_enabled || fw_dump.dump_active) + return -EPERM; + + if (count >= COMMAND_LINE_SIZE) + return -EINVAL; + + /* + * Fail here instead of handling this scenario with + * some silly workaround in capture kernel. + */ + if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { + pr_err("Appending parameters exceeds cmdline size!\n"); + return -ENOSPC; + } + + params = __va(fw_dump.param_area); + strscpy_pad(params, buf, COMMAND_LINE_SIZE); + /* Remove newline character at the end. */ + if (params[count-1] == '\n') + params[count-1] = '\0'; + + return count; +} + static ssize_t registered_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1556,11 +1561,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); +static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; @@ -1632,6 +1640,150 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + +/* + * Reserve memory to store additional parameters to be passed + * for fadump/capture kernel. + */ +static void __init fadump_setup_param_area(void) +{ + phys_addr_t range_start, range_end; + + if (!fw_dump.param_area_supported || fw_dump.dump_active) + return; + + /* This memory can't be used by PFW or bootloader as it is shared across kernels */ + if (radix_enabled()) { + /* + * Anywhere in the upper half should be good enough as all memory + * is accessible in real mode. + */ + range_start = memblock_end_of_DRAM() / 2; + range_end = memblock_end_of_DRAM(); + } else { + /* + * Passing additional parameters is supported for hash MMU only + * if the first memory block size is 768MB or higher. + */ + if (ppc64_rma_size < 0x30000000) + return; + + /* + * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving + * memory for passing additional parameters in this range to avoid + * being stomped on by PFW/bootloader. + */ + range_start = 0x2A000000; + range_end = range_start + 0x4000000; + } + + fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, + COMMAND_LINE_SIZE, + range_start, + range_end); + if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { + pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); + return; + } + + memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); +} + /* * Prepare for firmware-assisted dump. */ @@ -1651,15 +1803,11 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { + fadump_setup_param_area(); fw_dump.ops->fadump_init_mem_struct(&fw_dump); register_fadump(); } diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 647b0b445e89..edc479a7c2bc 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -199,12 +199,12 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c1d89764dd22..57196883a00e 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -419,14 +419,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -442,7 +442,7 @@ InstructionTLBMiss: andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 072ebe7f290b..f8208c027148 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct pt_regs *regs; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bbca90a5e2ec..14c5ddec3056 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,8 +19,8 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> -#include <linux/moduleloader.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> @@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offse return (kprobe_opcode_t *)(addr + offset); } -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - if (strict_module_rwx_enabled()) { - int err = set_memory_rox((unsigned long)page, 1); - - if (err) - goto error; - } - return page; -error: - module_memfree(page); - return NULL; -} - int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1a8cdafd68e8..91123e102db4 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -192,7 +192,7 @@ _GLOBAL(scom970_read) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd, * and finally or in RW bit */ @@ -226,7 +226,7 @@ _GLOBAL(scom970_write) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd. */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a1692..baeb24c102c8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -7,7 +7,6 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> @@ -17,8 +16,6 @@ #include <asm/setup.h> #include <asm/sections.h> -static LIST_HEAD(module_bug_list); - static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) @@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } - -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) -{ - pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); - - /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. - */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ -#ifdef MODULES_VADDR - unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; - - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); - - return ptr; -#else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); -#endif -} diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9452a54d356c..a7671786764b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1185,6 +1185,9 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) t->hashkeyr = mfspr(SPRN_HASHKEYR); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + t->dexcr = mfspr(SPRN_DEXCR); #endif } @@ -1267,6 +1270,10 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && old_thread->hashkeyr != new_thread->hashkeyr) mtspr(SPRN_HASHKEYR, new_thread->hashkeyr); + + if (cpu_has_feature(CPU_FTR_ARCH_31) && + old_thread->dexcr != new_thread->dexcr) + mtspr(SPRN_DEXCR, new_thread->dexcr); #endif } @@ -1634,6 +1641,13 @@ void arch_setup_new_exec(void) current->thread.regs->amr = default_amr; current->thread.regs->iamr = default_iamr; #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + current->thread.dexcr = current->thread.dexcr_onexec; + mtspr(SPRN_DEXCR, current->thread.dexcr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } #ifdef CONFIG_PPC64 @@ -1647,7 +1661,7 @@ void arch_setup_new_exec(void) * cases will happen: * * 1. The correct thread is running, the wrong thread is not - * In this situation, the correct thread is woken and proceeds to pass it's + * In this situation, the correct thread is woken and proceeds to pass its * condition check. * * 2. Neither threads are running @@ -1657,15 +1671,15 @@ void arch_setup_new_exec(void) * for the wrong thread, or they will execute the condition check immediately. * * 3. The wrong thread is running, the correct thread is not - * The wrong thread will be woken, but will fail it's condition check and + * The wrong thread will be woken, but will fail its condition check and * re-execute wait. The correct thread, when scheduled, will execute either - * it's condition check (which will pass), or wait, which returns immediately - * when called the first time after the thread is scheduled, followed by it's + * its condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by its * condition check (which will pass). * * 4. Both threads are running - * Both threads will be woken. The wrong thread will fail it's condition check - * and execute another wait, while the correct thread will pass it's condition + * Both threads will be woken. The wrong thread will fail its condition check + * and execute another wait, while the correct thread will pass its condition * check. * * @t: the task to set the thread ID for @@ -1878,6 +1892,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) p->thread.hashkeyr = current->thread.hashkeyr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + p->thread.dexcr = mfspr(SPRN_DEXCR); #endif return 0; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index cd8d8883de90..60819751e55e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -779,7 +779,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - phys_addr_t limit; + phys_addr_t int_vector_size; DBG(" -> early_init_devtree(%px)\n", params); @@ -813,6 +813,9 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); + /* Scan memory nodes and rebuild MEMBLOCKs */ early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); @@ -832,9 +835,16 @@ void __init early_init_devtree(void *params) setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* @@ -846,9 +856,12 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0ef358285337..fbb68fc28ed3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 210ea834e603..447bff87fd21 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). + * its thread_struct during __switch_to(). * * A reclaim flushes ALL the state or if not in TM save TM SPRs * in the appropriate thread structures from live. diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 584cf5c3df50..c1819e0a6684 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -469,12 +469,7 @@ static int dexcr_get(struct task_struct *target, const struct user_regset *regse if (!cpu_has_feature(CPU_FTR_ARCH_31)) return -ENODEV; - /* - * The DEXCR is currently static across all CPUs, so we don't - * store the target's value anywhere, but the static value - * will also be correct. - */ - membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT)); + membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr)); /* * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 01ed1263e1a9..4bd2f87616ba 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -405,7 +405,7 @@ static void __init cpu_init_thread_core_maps(int tpc) cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", - tpc, tpc > 1 ? "s" : ""); + tpc, str_plural(tpc)); printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2f19d5e94485..ae36a129789f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -834,6 +834,7 @@ static __init int pcpu_cpu_to_node(int cpu) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); void __init setup_per_cpu_areas(void) { @@ -876,6 +877,7 @@ void __init setup_per_cpu_areas(void) if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); + static_key_enable(&__percpu_first_chunk_is_paged.key); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 12e53b3d7923..46e6d2cd7a2d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1567,7 +1567,7 @@ static void add_cpu_to_masks(int cpu) /* * This CPU will not be in the online mask yet so we need to manually - * add it to it's own thread sibling mask. + * add it to its own thread sibling mask. */ map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0f39a6b84132..b842c83ab497 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -139,7 +139,7 @@ static unsigned long dscr_default; * @val: Returned cpu specific DSCR default value * * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void read_dscr(void *val) { @@ -152,7 +152,7 @@ static void read_dscr(void *val) * @val: New cpu specific DSCR default value to update * * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void write_dscr(void *val) { diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 8e469c4da3f8..470eb0453e17 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,11 +3,11 @@ # Makefile for the linux kernel. # -obj-y += core.o core_$(BITS).o +obj-y += core.o core_$(BITS).o ranges.o obj-$(CONFIG_PPC32) += relocate_32.o -obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_CRASH_DUMP) += crash.o diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 762e4d09aacf..85050be08a23 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -17,6 +17,7 @@ #include <linux/cpu.h> #include <linux/hardirq.h> #include <linux/of.h> +#include <linux/libfdt.h> #include <asm/page.h> #include <asm/current.h> @@ -30,6 +31,7 @@ #include <asm/hw_breakpoint.h> #include <asm/svm.h> #include <asm/ultravisor.h> +#include <asm/crashdump-ppc64.h> int machine_kexec_prepare(struct kimage *image) { @@ -419,3 +421,92 @@ static int __init export_htab_values(void) } late_initcall(export_htab_values); #endif /* CONFIG_PPC_64S_HASH_MMU */ + +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +/** + * add_node_props - Reads node properties from device node structure and add + * them to fdt. + * @fdt: Flattened device tree of the kernel + * @node_offset: offset of the node to add a property at + * @dn: device node pointer + * + * Returns 0 on success, negative errno on error. + */ +static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) +{ + int ret = 0; + struct property *pp; + + if (!dn) + return -EINVAL; + + for_each_property_of_node(dn, pp) { + ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); + if (ret < 0) { + pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); + return ret; + } + } + return ret; +} + +/** + * update_cpus_node - Update cpus node of flattened device tree using of_root + * device node. + * @fdt: Flattened device tree of the kernel. + * + * Returns 0 on success, negative errno on error. + */ +int update_cpus_node(void *fdt) +{ + struct device_node *cpus_node, *dn; + int cpus_offset, cpus_subnode_offset, ret = 0; + + cpus_offset = fdt_path_offset(fdt, "/cpus"); + if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { + pr_err("Malformed device tree: error reading /cpus node: %s\n", + fdt_strerror(cpus_offset)); + return cpus_offset; + } + + if (cpus_offset > 0) { + ret = fdt_del_node(fdt, cpus_offset); + if (ret < 0) { + pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); + return -EINVAL; + } + } + + /* Add cpus node to fdt */ + cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); + if (cpus_offset < 0) { + pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); + return -EINVAL; + } + + /* Add cpus node properties */ + cpus_node = of_find_node_by_path("/cpus"); + ret = add_node_props(fdt, cpus_offset, cpus_node); + of_node_put(cpus_node); + if (ret < 0) + return ret; + + /* Loop through all subnodes of cpus and add them to fdt */ + for_each_node_by_type(dn, "cpu") { + cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); + if (cpus_subnode_offset < 0) { + pr_err("Unable to add %s subnode: %s\n", dn->full_name, + fdt_strerror(cpus_subnode_offset)); + ret = cpus_subnode_offset; + goto out; + } + + ret = add_node_props(fdt, cpus_subnode_offset, dn); + if (ret < 0) + goto out; + } +out: + of_node_put(dn); + return ret; +} +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index ef5c2d25ec39..9ac3266e4965 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -16,6 +16,8 @@ #include <linux/delay.h> #include <linux/irq.h> #include <linux/types.h> +#include <linux/libfdt.h> +#include <linux/memory.h> #include <asm/processor.h> #include <asm/machdep.h> @@ -24,6 +26,7 @@ #include <asm/setjmp.h> #include <asm/debug.h> #include <asm/interrupt.h> +#include <asm/kexec_ranges.h> /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -392,3 +395,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } + +#ifdef CONFIG_CRASH_HOTPLUG +#undef pr_fmt +#define pr_fmt(fmt) "crash hp: " fmt + +/* + * Advertise preferred elfcorehdr size to userspace via + * /sys/kernel/crash_elfcorehdr_size sysfs interface. + */ +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned long phdr_cnt; + + /* A program header for possible CPUs + vmcoreinfo */ + phdr_cnt = num_possible_cpus() + 1; + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; + + return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr)); +} + +/** + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old + * elfcorehdr in the kexec segment array. + * @image: the active struct kimage + * @mn: struct memory_notify data handler + */ +static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) +{ + int ret; + struct crash_mem *cmem = NULL; + struct kexec_segment *ksegment; + void *ptr, *mem, *elfbuf = NULL; + unsigned long elfsz, memsz, base_addr, size; + + ksegment = &image->segment[image->elfcorehdr_index]; + mem = (void *) ksegment->mem; + memsz = ksegment->memsz; + + ret = get_crash_memory_ranges(&cmem); + if (ret) { + pr_err("Failed to get crash mem range\n"); + return; + } + + /* + * The hot unplugged memory is part of crash memory ranges, + * remove it here. + */ + if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { + base_addr = PFN_PHYS(mn->start_pfn); + size = mn->nr_pages * PAGE_SIZE; + ret = remove_mem_range(&cmem, base_addr, size); + if (ret) { + pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); + goto out; + } + } + + ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); + if (ret) { + pr_err("Failed to prepare elf header\n"); + goto out; + } + + /* + * It is unlikely that kernel hit this because elfcorehdr kexec + * segment (memsz) is built with addition space to accommodate growing + * number of crash memory ranges while loading the kdump kernel. It is + * Just to avoid any unforeseen case. + */ + if (elfsz > memsz) { + pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); + goto out; + } + + ptr = __va(mem); + if (ptr) { + /* Temporarily invalidate the crash image while it is replaced */ + xchg(&kexec_crash_image, NULL); + + /* Replace the old elfcorehdr with newly prepared elfcorehdr */ + memcpy((void *)ptr, elfbuf, elfsz); + + /* The crash image is now valid once again */ + xchg(&kexec_crash_image, image); + } +out: + kvfree(cmem); + kvfree(elfbuf); +} + +/** + * get_fdt_index - Loop through the kexec segment array and find + * the index of the FDT segment. + * @image: a pointer to kexec_crash_image + * + * Returns the index of FDT segment in the kexec segment array + * if found; otherwise -1. + */ +static int get_fdt_index(struct kimage *image) +{ + void *ptr; + unsigned long mem; + int i, fdt_index = -1; + + /* Find the FDT segment index in kexec segment array. */ + for (i = 0; i < image->nr_segments; i++) { + mem = image->segment[i].mem; + ptr = __va(mem); + + if (ptr && fdt_magic(ptr) == FDT_MAGIC) { + fdt_index = i; + break; + } + } + + return fdt_index; +} + +/** + * update_crash_fdt - updates the cpus node of the crash FDT. + * + * @image: a pointer to kexec_crash_image + */ +static void update_crash_fdt(struct kimage *image) +{ + void *fdt; + int fdt_index; + + fdt_index = get_fdt_index(image); + if (fdt_index < 0) { + pr_err("Unable to locate FDT segment.\n"); + return; + } + + fdt = __va((void *)image->segment[fdt_index].mem); + + /* Temporarily invalidate the crash image while it is replaced */ + xchg(&kexec_crash_image, NULL); + + /* update FDT to reflect changes in CPU resources */ + if (update_cpus_node(fdt)) + pr_err("Failed to update crash FDT"); + + /* The crash image is now valid once again */ + xchg(&kexec_crash_image, image); +} + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; +#endif + return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; +} + +/** + * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the + * necessary kexec segments based on the hotplug event. + * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case. + * + * Update the kdump image based on the type of hotplug event, represented by image->hp_action. + * CPU add: Update the FDT segment to include the newly added CPU. + * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs + * part of the FDT. + * Memory add/remove: No action is taken as this is not yet supported. + */ +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) +{ + struct memory_notify *mn; + + switch (image->hp_action) { + case KEXEC_CRASH_HP_REMOVE_CPU: + return; + + case KEXEC_CRASH_HP_ADD_CPU: + update_crash_fdt(image); + break; + + case KEXEC_CRASH_HP_REMOVE_MEMORY: + case KEXEC_CRASH_HP_ADD_MEMORY: + mn = (struct memory_notify *)arg; + update_crash_elfcorehdr(image, mn); + return; + default: + pr_warn_once("Unknown hotplug action\n"); + } +} +#endif /* CONFIG_CRASH_HOTPLUG */ diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 6d8951e8e966..214c071c58ed 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -116,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out_free_fdt; - fdt_pack(fdt); + if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH) + fdt_pack(fdt); kbuf.buffer = fdt; kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 1bc65de6174f..925a69ad2468 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -30,6 +30,7 @@ #include <asm/iommu.h> #include <asm/prom.h> #include <asm/plpks.h> +#include <asm/cputhreads.h> struct umem_info { __be64 *buf; /* data buffer for usable-memory property */ @@ -48,83 +49,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; /** - * get_exclude_memory_ranges - Get exclude memory ranges. This list includes - * regions like opal/rtas, tce-table, initrd, - * kernel, htab which should be avoided while - * setting up kexec load segments. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - ret = add_tce_mem_ranges(mem_ranges); - if (ret) - goto out; - - ret = add_initrd_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_htab_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_kernel_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_reserved_mem_ranges(mem_ranges); - if (ret) - goto out; - - /* exclude memory ranges should be sorted for easy lookup */ - sort_memory_ranges(*mem_ranges, true); -out: - if (ret) - pr_err("Failed to setup exclude memory ranges\n"); - return ret; -} - -/** - * get_reserved_memory_ranges - Get reserve memory ranges. This list includes - * memory regions that should be added to the - * memory reserve map to ensure the region is - * protected from any mischief. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_tce_mem_ranges(mem_ranges); - if (ret) - goto out; - - ret = add_reserved_mem_ranges(mem_ranges); -out: - if (ret) - pr_err("Failed to setup reserved memory ranges\n"); - return ret; -} - -/** * __locate_mem_hole_top_down - Looks top down for a large enough memory hole * in the memory regions between buf_min & buf_max * for the buffer. If found, sets kbuf->mem. @@ -323,119 +247,6 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, #ifdef CONFIG_CRASH_DUMP /** - * get_usable_memory_ranges - Get usable memory ranges. This list includes - * regions like crashkernel, opal/rtas & tce-table, - * that kdump kernel could use. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_usable_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - /* - * Early boot failure observed on guests when low memory (first memory - * block?) is not added to usable memory. So, add [0, crashk_res.end] - * instead of [crashk_res.start, crashk_res.end] to workaround it. - * Also, crashed kernel's memory must be added to reserve map to - * avoid kdump kernel from using it. - */ - ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); - if (ret) - goto out; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_tce_mem_ranges(mem_ranges); -out: - if (ret) - pr_err("Failed to setup usable memory ranges\n"); - return ret; -} - -/** - * get_crash_memory_ranges - Get crash memory ranges. This list includes - * first/crashing kernel's memory regions that - * would be exported via an elfcore. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_crash_memory_ranges(struct crash_mem **mem_ranges) -{ - phys_addr_t base, end; - struct crash_mem *tmem; - u64 i; - int ret; - - for_each_mem_range(i, &base, &end) { - u64 size = end - base; - - /* Skip backup memory region, which needs a separate entry */ - if (base == BACKUP_SRC_START) { - if (size > BACKUP_SRC_SIZE) { - base = BACKUP_SRC_END + 1; - size -= BACKUP_SRC_SIZE; - } else - continue; - } - - ret = add_mem_range(mem_ranges, base, size); - if (ret) - goto out; - - /* Try merging adjacent ranges before reallocation attempt */ - if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) - sort_memory_ranges(*mem_ranges, true); - } - - /* Reallocate memory ranges if there is no space to split ranges */ - tmem = *mem_ranges; - if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { - tmem = realloc_mem_ranges(mem_ranges); - if (!tmem) - goto out; - } - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - /* - * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL - * regions are exported to save their context at the time of - * crash, they should actually be backed up just like the - * first 64K bytes of memory. - */ - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - /* create a separate program header for the backup region */ - ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); - if (ret) - goto out; - - sort_memory_ranges(*mem_ranges, false); -out: - if (ret) - pr_err("Failed to setup crash memory ranges\n"); - return ret; -} - -/** * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries * @um_info: Usable memory buffer and ranges info. * @cnt: No. of entries to accommodate. @@ -784,6 +595,23 @@ static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) } } +static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem) +{ +#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG) + unsigned int extra_sz = 0; + + if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM) + pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES); + else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES) + pr_warn("Configured crash mem ranges may not be enough\n"); + else + extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr); + + return extra_sz; +#endif + return 0; +} + /** * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr * segment needed to load kdump kernel. @@ -815,7 +643,8 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) kbuf->buffer = headers; kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->bufsz = headers_sz; + kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem); kbuf->top_down = false; ret = kexec_add_buffer(kbuf); @@ -979,6 +808,9 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) unsigned int cpu_nodes, extra_size = 0; struct device_node *dn; u64 usm_entries; +#ifdef CONFIG_CRASH_HOTPLUG + unsigned int possible_cpu_nodes; +#endif if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH) return 0; @@ -1006,6 +838,19 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) if (cpu_nodes > boot_cpu_node_count) extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); +#ifdef CONFIG_CRASH_HOTPLUG + /* + * Make sure enough space is reserved to accommodate possible CPU nodes + * in the crash FDT. This allows packing possible CPU nodes which are + * not yet present in the system without regenerating the entire FDT. + */ + if (image->type == KEXEC_TYPE_CRASH) { + possible_cpu_nodes = num_possible_cpus() / threads_per_core; + if (possible_cpu_nodes > cpu_nodes) + extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size(); + } +#endif + return extra_size; } @@ -1028,93 +873,6 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) return extra_size + kdump_extra_fdt_size_ppc64(image); } -/** - * add_node_props - Reads node properties from device node structure and add - * them to fdt. - * @fdt: Flattened device tree of the kernel - * @node_offset: offset of the node to add a property at - * @dn: device node pointer - * - * Returns 0 on success, negative errno on error. - */ -static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) -{ - int ret = 0; - struct property *pp; - - if (!dn) - return -EINVAL; - - for_each_property_of_node(dn, pp) { - ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); - if (ret < 0) { - pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); - return ret; - } - } - return ret; -} - -/** - * update_cpus_node - Update cpus node of flattened device tree using of_root - * device node. - * @fdt: Flattened device tree of the kernel. - * - * Returns 0 on success, negative errno on error. - */ -static int update_cpus_node(void *fdt) -{ - struct device_node *cpus_node, *dn; - int cpus_offset, cpus_subnode_offset, ret = 0; - - cpus_offset = fdt_path_offset(fdt, "/cpus"); - if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { - pr_err("Malformed device tree: error reading /cpus node: %s\n", - fdt_strerror(cpus_offset)); - return cpus_offset; - } - - if (cpus_offset > 0) { - ret = fdt_del_node(fdt, cpus_offset); - if (ret < 0) { - pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); - return -EINVAL; - } - } - - /* Add cpus node to fdt */ - cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); - if (cpus_offset < 0) { - pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); - return -EINVAL; - } - - /* Add cpus node properties */ - cpus_node = of_find_node_by_path("/cpus"); - ret = add_node_props(fdt, cpus_offset, cpus_node); - of_node_put(cpus_node); - if (ret < 0) - return ret; - - /* Loop through all subnodes of cpus and add them to fdt */ - for_each_node_by_type(dn, "cpu") { - cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); - if (cpus_subnode_offset < 0) { - pr_err("Unable to add %s subnode: %s\n", dn->full_name, - fdt_strerror(cpus_subnode_offset)); - ret = cpus_subnode_offset; - goto out; - } - - ret = add_node_props(fdt, cpus_subnode_offset, dn); - if (ret < 0) - goto out; - } -out: - of_node_put(dn); - return ret; -} - static int copy_property(void *fdt, int node_offset, const struct device_node *dn, const char *propname) { diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 33b780049aaf..3702b0bdab14 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -20,9 +20,13 @@ #include <linux/kexec.h> #include <linux/of.h> #include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/crash_core.h> #include <asm/sections.h> #include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * get_max_nr_ranges - Get the max no. of ranges crash_mem structure * could hold, given the size allocated for it. @@ -234,13 +238,16 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) return __add_mem_range(mem_ranges, base, size); } +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ + +#ifdef CONFIG_KEXEC_FILE /** * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. * @mem_ranges: Range list to add the memory range(s) to. * * Returns 0 on success, negative errno on error. */ -int add_tce_mem_ranges(struct crash_mem **mem_ranges) +static int add_tce_mem_ranges(struct crash_mem **mem_ranges) { struct device_node *dn = NULL; int ret = 0; @@ -279,7 +286,7 @@ int add_tce_mem_ranges(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_initrd_mem_range(struct crash_mem **mem_ranges) +static int add_initrd_mem_range(struct crash_mem **mem_ranges) { u64 base, end; int ret; @@ -296,7 +303,6 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) return ret; } -#ifdef CONFIG_PPC_64S_HASH_MMU /** * add_htab_mem_range - Adds htab range to the given memory ranges list, * if it exists @@ -304,14 +310,18 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_htab_mem_range(struct crash_mem **mem_ranges) +static int add_htab_mem_range(struct crash_mem **mem_ranges) { + +#ifdef CONFIG_PPC_64S_HASH_MMU if (!htab_address) return 0; return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); -} +#else + return 0; #endif +} /** * add_kernel_mem_range - Adds kernel text region to the given @@ -320,18 +330,20 @@ int add_htab_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_kernel_mem_range(struct crash_mem **mem_ranges) +static int add_kernel_mem_range(struct crash_mem **mem_ranges) { return add_mem_range(mem_ranges, 0, __pa(_end)); } +#endif /* CONFIG_KEXEC_FILE */ +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ -int add_rtas_mem_range(struct crash_mem **mem_ranges) +static int add_rtas_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u32 base, size; @@ -356,7 +368,7 @@ int add_rtas_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_opal_mem_range(struct crash_mem **mem_ranges) +static int add_opal_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u64 base, size; @@ -374,7 +386,9 @@ int add_opal_mem_range(struct crash_mem **mem_ranges) of_node_put(dn); return ret; } +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ +#ifdef CONFIG_KEXEC_FILE /** * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w * to the given memory ranges list. @@ -382,7 +396,7 @@ int add_opal_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +static int add_reserved_mem_ranges(struct crash_mem **mem_ranges) { int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; struct device_node *root = of_find_node_by_path("/"); @@ -412,3 +426,283 @@ int add_reserved_mem_ranges(struct crash_mem **mem_ranges) return ret; } + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +#ifdef CONFIG_CRASH_DUMP +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ +#endif /* CONFIG_KEXEC_FILE */ + +#ifdef CONFIG_CRASH_DUMP +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + phys_addr_t base, end; + struct crash_mem *tmem; + u64 i; + int ret; + + for_each_mem_range(i, &base, &end) { + u64 size = end - base; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * remove_mem_range - Removes the given memory range from the range list. + * @mem_ranges: Range list to remove the memory range to. + * @base: Base address of the range to remove. + * @size: Size of the memory range to remove. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + u64 end; + int ret = 0; + unsigned int i; + u64 mstart, mend; + struct crash_mem *mem_rngs = *mem_ranges; + + if (!size) + return 0; + + /* + * Memory range are stored as start and end address, use + * the same format to do remove operation. + */ + end = base + size - 1; + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + + /* + * Memory range to remove is not part of this range entry + * in the memory range list + */ + if (!(base >= mstart && end <= mend)) + continue; + + /* + * Memory range to remove is equivalent to this entry in the + * memory range list. Remove the range entry from the list. + */ + if (base == mstart && end == mend) { + for (; i < mem_rngs->nr_ranges - 1; i++) { + mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; + mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; + } + mem_rngs->nr_ranges--; + goto out; + } + /* + * Start address of the memory range to remove and the + * current memory range entry in the list is same. Just + * move the start address of the current memory range + * entry in the list to end + 1. + */ + else if (base == mstart) { + mem_rngs->ranges[i].start = end + 1; + goto out; + } + /* + * End address of the memory range to remove and the + * current memory range entry in the list is same. + * Just move the end address of the current memory + * range entry in the list to base - 1. + */ + else if (end == mend) { + mem_rngs->ranges[i].end = base - 1; + goto out; + } + /* + * Memory range to remove is not at the edge of current + * memory range entry. Split the current memory entry into + * two half. + */ + else { + mem_rngs->ranges[i].end = base - 1; + size = mem_rngs->ranges[i].end - end; + ret = add_mem_range(mem_ranges, end + 1, size); + } + } +out: + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 8acec144120e..ff6c38373957 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -360,10 +360,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, break; } -#if 0 - printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); -#endif - if (deliver) kvmppc_inject_interrupt(vcpu, vec, 0); @@ -899,11 +895,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm->arch.kvm_ops->test_age_gfn(kvm, range); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - return kvm->arch.kvm_ops->set_spte_gfn(kvm, range); -} - int kvmppc_core_init_vm(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 58391b4b32ed..4aa2ab89afbc 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); -extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b1f0cdd8c18..1b51b1c4713b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_test_age_rmapp(kvm, range->slot, range->start); } -bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) -{ - WARN_ON(range->start + 1 != range->end); - - if (kvm_is_radix(kvm)) - kvm_unmap_radix(kvm, range->slot, range->start); - else - kvm_unmap_rmapp(kvm, range->slot, range->start); - - return false; -} - static int vcpus_running(struct kvm *kvm) { return atomic_read(&kvm->arch.vcpus_running) != 0; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 5bbfb2eed127..de126d153328 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -714,7 +714,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_HID1: to_book3s(vcpu)->hid[1] = spr_val; break; - case SPRN_HID2: + case SPRN_HID2_750FX: to_book3s(vcpu)->hid[2] = spr_val; break; case SPRN_HID2_GEKKO: @@ -900,7 +900,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_HID1: *spr_val = to_book3s(vcpu)->hid[1]; break; - case SPRN_HID2: + case SPRN_HID2_750FX: case SPRN_HID2_GEKKO: *spr_val = to_book3s(vcpu)->hid[2]; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8e86eb577eb8..daaf7faf21a5 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4857,7 +4857,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * entering a nested guest in which case the decrementer is now owned * by L2 and the L1 decrementer is provided in hdec_expires */ - if (!kvmhv_is_nestedv2() && kvmppc_core_pending_dec(vcpu) && + if (kvmppc_core_pending_dec(vcpu) && ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) @@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = { .unmap_gfn_range = kvm_unmap_gfn_range_hv, .age_gfn = kvm_age_gfn_hv, .test_age_gfn = kvm_test_age_gfn_hv, - .set_spte_gfn = kvm_set_spte_gfn_hv, .free_memslot = kvmppc_core_free_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index 8e6f5355f08b..1091f7a83b25 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -71,8 +71,8 @@ gs_msg_ops_kvmhv_nestedv2_config_fill_info(struct kvmppc_gs_buff *gsb, } if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT)) { - kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT, - cfg->vcpu_run_output_cfg); + rc = kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT, + cfg->vcpu_run_output_cfg); if (rc < 0) return rc; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5b92619a05fd..a7d7137ea0c8 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return do_kvm_unmap_gfn(kvm, range); -} - /*****************************************/ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) @@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = { .unmap_gfn_range = kvm_unmap_gfn_range_pr, .age_gfn = kvm_age_gfn_pr, .test_age_gfn = kvm_test_age_gfn_pr, - .set_spte_gfn = kvm_set_spte_gfn_pr, .free_memslot = kvmppc_core_free_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 29a382249770..1362c672387e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -531,7 +531,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) xc->cppr = xive_prio_from_guest(new_cppr); /* - * IPIs are synthetized from MFRR and thus don't need + * IPIs are synthesized from MFRR and thus don't need * any special EOI handling. The underlying interrupt * used to signal MFRR changes is EOId when fetched from * the queue. diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index ccb8f16ffe41..c664fdec75b1 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return kvm_e500_mmu_unmap_gfn(kvm, range); -} - /*****************************************/ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0ab65eeb93ee..f14ecab674a3 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -3,8 +3,6 @@ # Makefile for ppc-specific library files.. # -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - CFLAGS_code-patching.o += -fno-stack-protector CFLAGS_feature-fixups.o += -fno-stack-protector diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c6ab46156cda..0d1f3ee91115 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -225,7 +225,7 @@ void __init poking_init(void) static unsigned long get_patch_pfn(void *addr) { - if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) + if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr)) return vmalloc_to_pfn(addr); else return __pa_symbol(addr) >> PAGE_SHIFT; @@ -372,9 +372,32 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction); +static int patch_memset64(u64 *addr, u64 val, size_t count) +{ + for (u64 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u64, failed); + + return 0; + +failed: + return -EPERM; +} + +static int patch_memset32(u32 *addr, u32 val, size_t count) +{ + for (u32 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u32, failed); + + return 0; + +failed: + return -EPERM; +} + static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) { unsigned long start = (unsigned long)patch_addr; + int err; /* Repeat instruction */ if (repeat_instr) { @@ -383,19 +406,19 @@ static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool rep if (ppc_inst_prefixed(instr)) { u64 val = ppc_inst_as_ulong(instr); - memset64((u64 *)patch_addr, val, len / 8); + err = patch_memset64((u64 *)patch_addr, val, len / 8); } else { u32 val = ppc_inst_val(instr); - memset32(patch_addr, val, len / 4); + err = patch_memset32(patch_addr, val, len / 4); } } else { - memcpy(patch_addr, code, len); + err = copy_to_kernel_nofault(patch_addr, code, len); } smp_wmb(); /* smp write barrier */ flush_icache_range(start, start + len); - return 0; + return err; } /* diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4f82581ca203..b7201ba50b2e 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -25,6 +25,13 @@ #include <asm/firmware.h> #include <asm/inst.h> +/* + * Used to generate warnings if mmu or cpu feature check functions that + * use static keys before they are initialized. + */ +bool static_key_feature_checks_initialized __read_mostly; +EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized); + struct fixup_entry { unsigned long mask; unsigned long value; @@ -679,6 +686,7 @@ void __init setup_feature_keys(void) jump_label_init(); cpu_feature_keys_init(); mmu_feature_keys_init(); + static_key_feature_checks_initialized = true; } static int __init check_features(void) diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index c44823292f73..f76030087f98 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -347,6 +347,97 @@ static void __init test_prefixed_patching(void) check(!memcmp(iptr, expected, sizeof(expected))); } +static void __init test_multi_instruction_patching(void) +{ + u32 code[32]; + void *buf; + u32 *addr32; + u64 *addr64; + ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP()); + u32 inst32 = PPC_RAW_NOP(); + + buf = vzalloc(PAGE_SIZE * 8); + check(buf); + if (!buf) + return; + + /* Test single page 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test single page 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 2; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test single page memcpy */ + addr32 = buf + PAGE_SIZE * 3; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + /* Test multipage 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE * 4 - 8; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test multipage 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 5 - 8; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test multipage memcpy */ + addr32 = buf + PAGE_SIZE * 6 - 12; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + vfree(buf); +} + static int __init test_code_patching(void) { pr_info("Running code patching self-tests ...\n"); @@ -356,6 +447,7 @@ static int __init test_code_patching(void) test_create_function_call(); test_translate_branch(); test_prefixed_patching(); + test_multi_instruction_patching(); return 0; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 503a6e249940..0fe2f085c05a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 100f999871bc..625fe7d08e06 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) static bool is_module_segment(unsigned long addr) { - if (!IS_ENABLED(CONFIG_MODULES)) + if (!IS_ENABLED(CONFIG_EXECMEM)) return false; if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index cad2abc1730f..33af5795856a 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := $(NO_MINIMAL_TOC) - obj-y += mmu_context.o pgtable.o trace.o ifdef CONFIG_PPC_64S_HASH_MMU CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 15189592da09..7186516eca52 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_HIGHMEM /** - * flush_dcache_icache_phys() - Flush a page by it's physical address + * flush_dcache_icache_phys() - Flush a page by its physical address * @physaddr: the physical address of the page */ static void flush_dcache_icache_phys(unsigned long physaddr) diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c index 11519e88dc6b..43c03b84ff32 100644 --- a/arch/powerpc/mm/kasan/init_book3e_64.c +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -112,7 +112,7 @@ void __init kasan_init(void) pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); for_each_mem_range(i, &start, &end) - kasan_init_phys_region((void *)start, (void *)end); + kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end)); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 9300d641cf9a..3fb5ce4f48f4 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -62,7 +62,7 @@ void __init kasan_init(void) } for_each_mem_range(i, &start, &end) - kasan_init_phys_region((void *)start, (void *)end); + kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end)); for (i = 0; i < PTRS_PER_PTE; i++) __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3a440004b97d..22cc978cdb47 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -16,6 +16,7 @@ #include <linux/highmem.h> #include <linux/suspend.h> #include <linux/dma-direct.h> +#include <linux/execmem.h> #include <asm/swiotlb.h> #include <asm/machdep.h> @@ -30,7 +31,7 @@ #include <mm/mmu_decl.h> -unsigned long long memory_limit; +unsigned long long memory_limit __initdata; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -406,3 +407,66 @@ int devmem_is_allowed(unsigned long pfn) * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed. */ EXPORT_SYMBOL_GPL(walk_system_ram_range); + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start, end; + + /* + * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and + * allow allocating data in the entire vmalloc space + */ +#ifdef MODULES_VADDR + unsigned long limit = (unsigned long)_etext - SZ_32M; + + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + /* First try within 32M limit from _etext to avoid branch trampolines */ + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) { + start = limit; + fallback_start = MODULES_VADDR; + fallback_end = MODULES_END; + } else { + start = MODULES_VADDR; + } + + end = MODULES_END; +#else + start = VMALLOC_START; + end = VMALLOC_END; +#endif + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = prot, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = kprobes_prot, + .alignment = 1, + }, + [EXECMEM_MODULE_DATA] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index f3894e79d5f7..b3f0498dd42f 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - obj-y += mmu_context.o tlb.o tlb_low.o kup.o obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index cdff129abb14..5c8d1bb98b3e 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -376,7 +376,7 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) create_kaslr_tlb_entry(1, tlb_virt, tlb_phys); } - /* Copy the kernel to it's new location and run */ + /* Copy the kernel to its new location and run */ memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz); flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz); diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index 9a601587836b..a6baa6166d94 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -491,7 +491,7 @@ static void walk_vmemmap(struct pg_state *st) * Traverse the vmemmaped memory and dump pages that are in the hash * pagetable. */ - while (ptr->list) { + while (ptr) { hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); ptr = ptr->list; } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 0f9a21783329..984655419da5 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -359,3 +359,13 @@ void bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + +bool bpf_jit_supports_far_kfunc_call(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 2f39c50ca729..43b97032a91c 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -450,10 +450,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code } break; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ - EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg)); + if (off) + EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, src_reg)); + else + EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg)); + if (off) + EMIT(PPC_RAW_DIVW(_R0, src2_reg, src_reg)); + else + EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg)); EMIT(PPC_RAW_MULW(_R0, src_reg, _R0)); EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0)); break; @@ -467,10 +473,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (imm == 1) { EMIT(PPC_RAW_MR(dst_reg, src2_reg)); } else if (is_power_of_2((u32)imm)) { - EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm))); + if (off) + EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, ilog2(imm))); + else + EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm))); } else { PPC_LI32(_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0)); + if (off) + EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, _R0)); + else + EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0)); } break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ @@ -480,11 +492,19 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (!is_power_of_2((u32)imm)) { bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg)); + if (off) + EMIT(PPC_RAW_DIVW(_R0, src2_reg, tmp_reg)); + else + EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg)); EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0)); EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0)); } else if (imm == 1) { EMIT(PPC_RAW_LI(dst_reg, 0)); + } else if (off) { + EMIT(PPC_RAW_SRAWI(_R0, src2_reg, ilog2(imm))); + EMIT(PPC_RAW_ADDZE(_R0, _R0)); + EMIT(PPC_RAW_SLWI(_R0, _R0, ilog2(imm))); + EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0)); } else { imm = ilog2((u32)imm); EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31)); @@ -497,11 +517,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code imm = -imm; if (!is_power_of_2(imm)) return -EOPNOTSUPP; - if (imm == 1) + if (imm == 1) { EMIT(PPC_RAW_LI(dst_reg, 0)); - else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else if (off) { + EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31)); + EMIT(PPC_RAW_XOR(dst_reg, src2_reg, dst_reg_h)); + EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, dst_reg_h)); + EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, dst_reg_h, dst_reg_h)); + } else { EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31)); - EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } break; case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (!imm) @@ -727,15 +757,30 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * MOV */ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ - if (dst_reg == src_reg) - break; - EMIT(PPC_RAW_MR(dst_reg, src_reg)); - EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + if (off == 8) { + EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31)); + } else if (off == 16) { + EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31)); + } else if (off == 32 && dst_reg == src_reg) { + EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31)); + } else if (off == 32) { + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31)); + } else if (dst_reg != src_reg) { + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + } break; case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ /* special mov32 for zext */ if (imm == 1) EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (off == 8) + EMIT(PPC_RAW_EXTSB(dst_reg, src_reg)); + else if (off == 16) + EMIT(PPC_RAW_EXTSH(dst_reg, src_reg)); else if (dst_reg != src_reg) EMIT(PPC_RAW_MR(dst_reg, src_reg)); break; @@ -751,6 +796,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * BPF_FROM_BE/LE */ case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: /* Copy 16 bits to upper part */ @@ -785,6 +831,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); break; } + if (BPF_CLASS(code) == BPF_ALU64 && imm != 64) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); break; case BPF_ALU | BPF_END | BPF_FROM_BE: switch (imm) { @@ -918,11 +966,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* @@ -931,7 +985,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * load only if addr is kernel address (see is_kernel_addr()), otherwise * set dst_reg=0 and move on. */ - if (BPF_MODE(code) == BPF_PROBE_MEM) { + if (BPF_MODE(code) == BPF_PROBE_MEM || BPF_MODE(code) == BPF_PROBE_MEMSX) { PPC_LI32(_R0, TASK_SIZE - off); EMIT(PPC_RAW_CMPLW(src_reg, _R0)); PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); @@ -953,30 +1007,48 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * as there are two load instructions for dst_reg_h & dst_reg * respectively. */ - if (size == BPF_DW) + if (size == BPF_DW || + (size == BPF_B && BPF_MODE(code) == BPF_PROBE_MEMSX)) PPC_JMP((ctx->idx + 3) * 4); else PPC_JMP((ctx->idx + 2) * 4); } - switch (size) { - case BPF_B: - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - break; - case BPF_H: - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - break; - case BPF_W: - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - break; - case BPF_DW: - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); - break; - } + if (BPF_MODE(code) == BPF_MEMSX || BPF_MODE(code) == BPF_PROBE_MEMSX) { + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + EMIT(PPC_RAW_EXTSB(dst_reg, dst_reg)); + break; + case BPF_H: + EMIT(PPC_RAW_LHA(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + } + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31)); - if (size != BPF_DW && !fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + } + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } if (BPF_MODE(code) == BPF_PROBE_MEM) { int insn_idx = ctx->idx - 1; @@ -1068,6 +1140,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code case BPF_JMP | BPF_JA: PPC_JMP(addrs[i + 1 + off]); break; + case BPF_JMP32 | BPF_JA: + PPC_JMP(addrs[i + 1 + imm]); + break; case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGT | BPF_X: diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 79f23974a320..8afc14a4a125 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -202,29 +202,47 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) +static int +bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; long reladdr; - if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + if (WARN_ON_ONCE(!kernel_text_address(func_addr))) return -EINVAL; - if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { - reladdr = func_addr - CTX_NIA(ctx); - - if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { - pr_err("eBPF: address of %ps out of range of pcrel address.\n", - (void *)func); - return -ERANGE; - } - /* pla r12,addr */ - EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr)); - EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr)); - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTR()); +#ifdef CONFIG_PPC_KERNEL_PCREL + reladdr = func_addr - local_paca->kernelbase; + if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { + EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase))); + /* Align for subsequent prefix instruction */ + if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8)) + EMIT(PPC_RAW_NOP()); + /* paddi r12,r12,addr */ + EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr)); + EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr)); } else { + unsigned long pc = (unsigned long)fimage + CTX_NIA(ctx); + bool alignment_needed = !IS_ALIGNED(pc, 8); + + reladdr = func_addr - (alignment_needed ? pc + 4 : pc); + + if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { + if (alignment_needed) + EMIT(PPC_RAW_NOP()); + /* pla r12,addr */ + EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr)); + EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr)); + } else { + /* We can clobber r12 */ + PPC_LI64(_R12, func); + } + } + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); +#else + if (core_kernel_text(func_addr)) { reladdr = func_addr - kernel_toc_addr(); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); @@ -235,7 +253,32 @@ static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + } else { + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) { + /* func points to the function descriptor */ + PPC_LI64(bpf_to_ppc(TMP_REG_2), func); + /* Load actual entry point from function descriptor */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); + /* ... and move it to CTR */ + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2). + */ + EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); + } else { + PPC_LI64(_R12, func); + EMIT(PPC_RAW_MTCTR(_R12)); + } + EMIT(PPC_RAW_BCTRL()); + /* + * Load r2 with kernel TOC as kernel TOC is used if function address falls + * within core kernel text. + */ + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); } +#endif return 0; } @@ -285,7 +328,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o int b2p_index = bpf_to_ppc(BPF_REG_3); int bpf_tailcall_prologue_size = 8; - if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* @@ -993,7 +1036,7 @@ emit_clear: return ret; if (func_addr_fixed) - ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr); else ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 8f75e9574c27..8c1f3b629fc7 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -279,7 +279,7 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to memblock_reserve(). It will be + * also its length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 0b12647e7b42..0ec2522ee4ad 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -203,7 +203,8 @@ lite5200_wakeup: /* HIDs, MSR */ LOAD_SPRN(HID1, 0x19) - LOAD_SPRN(HID2, 0x1a) + /* FIXME: Should this use HID2_G2_LE? */ + LOAD_SPRN(HID2_750FX, 0x1a) /* address translation is tricky (see turn_on_mmu) */ @@ -283,7 +284,8 @@ SYM_FUNC_START_LOCAL(save_regs) SAVE_SPRN(HID0, 0x18) SAVE_SPRN(HID1, 0x19) - SAVE_SPRN(HID2, 0x1a) + /* FIXME: Should this use HID2_G2_LE? */ + SAVE_SPRN(HID2_750FX, 0x1a) mfmsr r10 stw r10, (4*0x1b)(r4) /*SAVE_SPRN(LR, 0x1c) have to save it before the call */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index b4938e344f71..253421ffb4e5 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -12,12 +12,10 @@ #undef DEBUG -#include <linux/gpio.h> #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/of_address.h> #include <linux/of_platform.h> -#include <linux/of_gpio.h> #include <linux/export.h> #include <asm/io.h> #include <asm/mpc52xx.h> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 581059527c36..2bd6abcdc113 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -48,6 +48,7 @@ * the output mode. This driver does not change the output mode setting. */ +#include <linux/gpio/driver.h> #include <linux/irq.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -56,7 +57,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/of_gpio.h> #include <linux/platform_device.h> #include <linux/kernel.h> #include <linux/property.h> diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index bc6bd4d0ae96..6a62ed6082c9 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -68,7 +68,8 @@ _GLOBAL(mpc83xx_enter_deep_sleep) mfspr r5, SPRN_HID0 mfspr r6, SPRN_HID1 - mfspr r7, SPRN_HID2 + /* FIXME: Should this use SPRN_HID2_G2_LE? */ + mfspr r7, SPRN_HID2_750FX stw r5, SS_HID+0(r3) stw r6, SS_HID+4(r3) @@ -396,7 +397,8 @@ mpc83xx_deep_resume: mtspr SPRN_HID0, r5 mtspr SPRN_HID1, r6 - mtspr SPRN_HID2, r7 + /* FIXME: Should this use SPRN_HID2_G2_LE? */ + mtspr SPRN_HID2_750FX, r7 lwz r4, SS_IABR+0(r3) lwz r5, SS_IABR+4(r3) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 40aa58206888..e52b848b64b7 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -398,6 +398,7 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) hard_irq_disable(); mpic_teardown_this_cpu(secondary); +#ifdef CONFIG_CRASH_DUMP if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) { /* * We enter the crash kernel on whatever cpu crashed, @@ -406,9 +407,11 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) */ disable_threadbit = 1; disable_cpu = cpu_first_thread_sibling(cpu); - } else if (sibling != crashing_cpu && - cpu_thread_in_core(cpu) == 0 && - cpu_thread_in_core(sibling) != 0) { + } else if (sibling == crashing_cpu) { + return; + } +#endif + if (cpu_thread_in_core(cpu) == 0 && cpu_thread_in_core(sibling) != 0) { disable_threadbit = 2; disable_cpu = sibling; } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 1202a69b0a20..4cd9c0de22c2 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -424,23 +424,6 @@ static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, cell_iommu_enable_hardware(iommu); } -#if 0/* Unused for now */ -static struct iommu_window *find_window(struct cbe_iommu *iommu, - unsigned long offset, unsigned long size) -{ - struct iommu_window *window; - - /* todo: check for overlapping (but not equal) windows) */ - - list_for_each_entry(window, &(iommu->windows), list) { - if (window->offset == offset && window->size == size) - return window; - } - - return NULL; -} -#endif - static inline u32 cell_iommu_get_ioid(struct device_node *np) { const u32 *ioid; diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 30394c6f8894..fee638fd8970 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -54,6 +54,7 @@ static cpumask_t of_spin_map; /** * smp_startup_cpu() - start the given cpu + * @lcpu: Logical CPU ID of the CPU to be started. * * At boot time, there is nothing to do for primary threads which were * started from Open Firmware. For anything else, call RTAS with the diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 02a8158c469d..7f4e0db8eb08 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1704,23 +1704,11 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) ret = spu_acquire(ctx); if (ret) - goto out; -#if 0 -/* this currently hangs */ - ret = spufs_wait(ctx->mfc_wq, - ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2)); - if (ret) - goto out; - ret = spufs_wait(ctx->mfc_wq, - ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); - if (ret) - goto out; -#else - ret = 0; -#endif + return ret; + spu_release(ctx); -out: - return ret; + + return 0; } static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 99bd027a7f7c..610ca8570682 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -868,7 +868,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) } /** - * spu_deactivate - unbind a context from it's physical spu + * spu_deactivate - unbind a context from its physical spu * @ctx: spu context to unbind * * Unbind @ctx from the physical spu it is running on and schedule diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index b911b31717cc..b9ff37c7f6f0 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -595,7 +595,7 @@ void __init maple_pci_init(void) /* Probe root PCI hosts, that is on U3 the AGP host and the * HyperTransport host. That one is actually "kept" around - * and actually added last as it's resource management relies + * and actually added last as its resource management relies * on the AGP resources to have been setup first */ root = of_find_node_by_path("/"); diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 7135ea1d7db6..2202bf77c7a3 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -2,7 +2,7 @@ /* * Support for the interrupt controllers found on Power Macintosh, * currently Apple's "Grand Central" interrupt controller in all - * it's incarnations. OpenPIC support used on newer machines is + * its incarnations. OpenPIC support used on newer machines is * in a separate file * * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index d497a60003d2..822ed70cdcbf 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -176,7 +176,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) * memory location containing the PC to resume from * at address 0. * - On Core99, we must store the wakeup vector at - * address 0x80 and eventually it's parameters + * address 0x80 and eventually its parameters * at address 0x84. I've have some trouble with those * parameters however and I no longer use them. */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 964f464b1b0e..c9c1dfb35464 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -513,8 +513,8 @@ out: final_note(note_buf); pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); + fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; } @@ -526,12 +526,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) return rc; - /* Validate the fadump crash info header */ fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return rc; - } #ifdef CONFIG_OPAL_CORE /* @@ -545,18 +540,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) kernel_initiated = true; #endif - rc = opal_fadump_build_cpu_notes(fadump_conf, fdh); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return rc; + return opal_fadump_build_cpu_notes(fadump_conf, fdh); } static void opal_fadump_region_show(struct fw_dump *fadump_conf, @@ -615,6 +599,12 @@ static void opal_fadump_trigger(struct fadump_crash_info_header *fdh, pr_emerg("No backend support for MPIPL!\n"); } +/* FADUMP_MAX_MEM_REGS or lower */ +static int opal_fadump_max_boot_mem_rgns(void) +{ + return FADUMP_MAX_MEM_REGS; +} + static struct fadump_ops opal_fadump_ops = { .fadump_init_mem_struct = opal_fadump_init_mem_struct, .fadump_get_metadata_size = opal_fadump_get_metadata_size, @@ -627,6 +617,7 @@ static struct fadump_ops opal_fadump_ops = { .fadump_process = opal_fadump_process, .fadump_region_show = opal_fadump_region_show, .fadump_trigger = opal_fadump_trigger, + .fadump_max_boot_mem_rgns = opal_fadump_max_boot_mem_rgns, }; void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) @@ -674,8 +665,10 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } } - fadump_conf->ops = &opal_fadump_ops; - fadump_conf->fadump_supported = 1; + fadump_conf->ops = &opal_fadump_ops; + fadump_conf->fadump_supported = 1; + /* TODO: Add support to pass additional parameters */ + fadump_conf->param_area_supported = 0; /* * Firmware supports 32-bit field for size. Align it to PAGE_SIZE diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index 59882da3e742..cc7b1dd54ac6 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -238,7 +238,7 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) } else if (pdev->is_physfn) { /* * For PFs adjust their allocated IOV resources to match what - * the PHB can support using it's M64 BAR table. + * the PHB can support using its M64 BAR table. */ pnv_pci_ioda_fixup_iov_resources(pdev); } @@ -658,7 +658,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); - /* associate this pe to it's pdn */ + /* associate this pe to its pdn */ list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { if (vf_pdn->busno == vf_bus && vf_pdn->devfn == vf_devfn) { diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index b664838008c1..5147df3a18ac 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1059,7 +1059,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } else { /* - * Interrupt hanlder or fault window setup failed. Means + * Interrupt handler or fault window setup failed. Means * NX can not generate fault for page fault. So not * opening for user space tx window. */ diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 878bc160246e..b18e1c92e554 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -770,49 +770,51 @@ static struct task_struct *probe_task; static int ps3_probe_thread(void *data) { - struct ps3_notification_device dev; + struct { + struct ps3_notification_device dev; + u8 buf[512]; + } *local; + struct ps3_notify_cmd *notify_cmd; + struct ps3_notify_event *notify_event; int res; unsigned int irq; u64 lpar; - void *buf; - struct ps3_notify_cmd *notify_cmd; - struct ps3_notify_event *notify_event; pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__); - buf = kzalloc(512, GFP_KERNEL); - if (!buf) + local = kzalloc(sizeof(*local), GFP_KERNEL); + if (!local) return -ENOMEM; - lpar = ps3_mm_phys_to_lpar(__pa(buf)); - notify_cmd = buf; - notify_event = buf; + lpar = ps3_mm_phys_to_lpar(__pa(&local->buf)); + notify_cmd = (struct ps3_notify_cmd *)&local->buf; + notify_event = (struct ps3_notify_event *)&local->buf; /* dummy system bus device */ - dev.sbd.bus_id = (u64)data; - dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID; - dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID; + local->dev.sbd.bus_id = (u64)data; + local->dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID; + local->dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID; - res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0); + res = lv1_open_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id, 0); if (res) { pr_err("%s:%u: lv1_open_device failed %s\n", __func__, __LINE__, ps3_result(res)); goto fail_free; } - res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY, - &irq); + res = ps3_sb_event_receive_port_setup(&local->dev.sbd, + PS3_BINDING_CPU_ANY, &irq); if (res) { pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n", __func__, __LINE__, res); goto fail_close_device; } - spin_lock_init(&dev.lock); - rcuwait_init(&dev.wait); + spin_lock_init(&local->dev.lock); + rcuwait_init(&local->dev.wait); res = request_irq(irq, ps3_notification_interrupt, 0, - "ps3_notification", &dev); + "ps3_notification", &local->dev); if (res) { pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__, res); @@ -823,7 +825,7 @@ static int ps3_probe_thread(void *data) notify_cmd->operation_code = 0; /* must be zero */ notify_cmd->event_mask = 1UL << notify_region_probe; - res = ps3_notification_read_write(&dev, lpar, 1); + res = ps3_notification_read_write(&local->dev, lpar, 1); if (res) goto fail_free_irq; @@ -834,36 +836,37 @@ static int ps3_probe_thread(void *data) memset(notify_event, 0, sizeof(*notify_event)); - res = ps3_notification_read_write(&dev, lpar, 0); + res = ps3_notification_read_write(&local->dev, lpar, 0); if (res) break; pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu" " type %llu port %llu\n", __func__, __LINE__, - notify_event->event_type, notify_event->bus_id, - notify_event->dev_id, notify_event->dev_type, - notify_event->dev_port); + notify_event->event_type, notify_event->bus_id, + notify_event->dev_id, notify_event->dev_type, + notify_event->dev_port); if (notify_event->event_type != notify_region_probe || - notify_event->bus_id != dev.sbd.bus_id) { + notify_event->bus_id != local->dev.sbd.bus_id) { pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n", __func__, __LINE__, notify_event->event_type, notify_event->dev_id, notify_event->dev_type); continue; } - ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id); + ps3_find_and_add_device(local->dev.sbd.bus_id, + notify_event->dev_id); } while (!kthread_should_stop()); fail_free_irq: - free_irq(irq, &dev); + free_irq(irq, &local->dev); fail_sb_event_receive_port_destroy: - ps3_sb_event_receive_port_destroy(&dev.sbd, irq); + ps3_sb_event_receive_port_destroy(&local->dev.sbd, irq); fail_close_device: - lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id); + lv1_close_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id); fail_free: - kfree(buf); + kfree(local); probe_task = NULL; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index f936962a2946..7bf506f6b8c8 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4e9916bb03d7..c1d8bee8f701 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1886,10 +1886,10 @@ out: * h_get_mpp * H_GET_MPP hcall returns info in 7 parms */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) +long h_get_mpp(struct hvcall_mpp_data *mpp_data) { - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_MPP, retbuf); diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f73c4d1c26af..6e7029640c0c 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -113,8 +113,8 @@ struct hvcall_ppp_data { */ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; rc = plpar_hcall9(H_GET_PPP, retbuf); @@ -170,20 +170,24 @@ out: kfree(buf); } -static unsigned h_pic(unsigned long *pool_idle_time, - unsigned long *num_procs) +static long h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = {0}; rc = plpar_hcall(H_PIC, retbuf); - *pool_idle_time = retbuf[0]; - *num_procs = retbuf[1]; + if (pool_idle_time) + *pool_idle_time = retbuf[0]; + if (num_procs) + *num_procs = retbuf[1]; return rc; } +unsigned long boot_pool_idle_time; + /* * parse_ppp_data * Parse out the data returned from h_get_ppp and h_pic @@ -193,7 +197,7 @@ static void parse_ppp_data(struct seq_file *m) struct hvcall_ppp_data ppp_data; struct device_node *root; const __be32 *perf_level; - int rc; + long rc; rc = h_get_ppp(&ppp_data); if (rc) @@ -215,9 +219,15 @@ static void parse_ppp_data(struct seq_file *m) seq_printf(m, "pool_capacity=%d\n", ppp_data.active_procs_in_pool * 100); - h_pic(&pool_idle_time, &pool_procs); - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + /* In case h_pic call is not successful, this would result in + * APP values being wrong in tools like lparstat. + */ + + if (h_pic(&pool_idle_time, &pool_procs) == H_SUCCESS) { + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + seq_printf(m, "boot_pool_idle_time=%ld\n", boot_pool_idle_time); + } } seq_printf(m, "unallocated_capacity_weight=%d\n", @@ -792,6 +802,7 @@ static const struct proc_ops lparcfg_proc_ops = { static int __init lparcfg_init(void) { umode_t mode = 0444; + long retval; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) @@ -801,6 +812,16 @@ static int __init lparcfg_init(void) printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } + + /* If this call fails, it would result in APP values + * being wrong for since boot reports of lparstat + */ + retval = h_pic(&boot_pool_idle_time, NULL); + + if (retval != H_SUCCESS) + pr_debug("H_PIC failed during lparcfg init retval: %ld\n", + retval); + return 0; } machine_device_initcall(pseries, lparcfg_init); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c233f9db039b..9b6420eb3567 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -16,7 +16,8 @@ #include <linux/nd.h> #include <asm/plpar_wrappers.h> -#include <asm/papr_pdsm.h> +#include <uapi/linux/papr_pdsm.h> +#include <linux/papr_scm.h> #include <asm/mce.h> #include <asm/unaligned.h> #include <linux/perf_event.h> @@ -29,46 +30,6 @@ (1ul << ND_CMD_SET_CONFIG_DATA) | \ (1ul << ND_CMD_CALL)) -/* DIMM health bitmap indicators */ -/* SCM device is unable to persist memory contents */ -#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) -/* SCM device failed to persist memory contents */ -#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) -/* SCM device contents are persisted from previous IPL */ -#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) -/* SCM device contents are not persisted from previous IPL */ -#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) -/* SCM device memory life remaining is critically low */ -#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) -/* SCM device will be garded off next IPL due to failure */ -#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) -/* SCM contents cannot persist due to current platform health status */ -#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) -/* SCM device is unable to persist memory contents in certain conditions */ -#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) -/* SCM device is encrypted */ -#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) -/* SCM device has been scrubbed and locked */ -#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) - -/* Bits status indicators for health bitmap indicating unarmed dimm */ -#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -/* Bits status indicators for health bitmap indicating unflushed dimm */ -#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) - -/* Bits status indicators for health bitmap indicating unrestored dimm */ -#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) - -/* Bit status indicators for smart event notification */ -#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ - PAPR_PMEM_HEALTH_FATAL | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) -#define PAPR_SCM_PERF_STATS_VERSION 0x1 - /* Struct holding a single performance metric */ struct papr_scm_perf_stat { u8 stat_id[8]; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 1772ae3d193d..6dbc73eb2ca2 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -18,33 +18,6 @@ #include <asm/pci.h> #include "pseries.h" -#if 0 -void pcibios_name_device(struct pci_dev *dev) -{ - struct device_node *dn; - - /* - * Add IBM loc code (slot) as a prefix to the device names for service - */ - dn = pci_device_to_OF_node(dev); - if (dn) { - const char *loc_code = of_get_property(dn, "ibm,loc-code", - NULL); - if (loc_code) { - int loc_len = strlen(loc_code); - if (loc_len < sizeof(dev->dev.name)) { - memmove(dev->dev.name+loc_len+1, dev->dev.name, - sizeof(dev->dev.name)-loc_len-1); - memcpy(dev->dev.name, loc_code, loc_len); - dev->dev.name[loc_len] = ' '; - dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; - } - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); -#endif - #ifdef CONFIG_PCI_IOV #define MAX_VFS_FOR_MAP_PE 256 struct pe_map_bar_entry { diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index b5853e9fcc3c..eceb3289383e 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/rtas.h> +#include <asm/setup.h> #include <asm/fadump.h> #include <asm/fadump-internal.h> @@ -29,9 +30,6 @@ static const struct rtas_fadump_mem_struct *fdm_active; static void rtas_fadump_update_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { - fadump_conf->boot_mem_dest_addr = - be64_to_cpu(fdm->rmr_region.destination_address); - fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr + fadump_conf->boot_memory_size); } @@ -43,20 +41,56 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { - fadump_conf->boot_mem_addr[0] = - be64_to_cpu(fdm->rmr_region.source_address); - fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len); - fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0]; + unsigned long base, size, last_end, hole_size; - fadump_conf->boot_mem_top = fadump_conf->boot_memory_size; - fadump_conf->boot_mem_regs_cnt = 1; + last_end = 0; + hole_size = 0; + fadump_conf->boot_memory_size = 0; + fadump_conf->boot_mem_regs_cnt = 0; + pr_debug("Boot memory regions:\n"); + for (int i = 0; i < be16_to_cpu(fdm->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm->rgn[i].source_data_type); + u64 addr; - /* - * Start address of reserve dump area (permanent reservation) for - * re-registering FADump after dump capture. - */ - fadump_conf->reserve_dump_area_start = - be64_to_cpu(fdm->cpu_state_data.destination_address); + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + addr = be64_to_cpu(fdm->rgn[i].destination_address); + + fadump_conf->cpu_state_dest_vaddr = (u64)__va(addr); + /* + * Start address of reserve dump area (permanent reservation) for + * re-registering FADump after dump capture. + */ + fadump_conf->reserve_dump_area_start = addr; + break; + case RTAS_FADUMP_HPTE_REGION: + /* Not processed currently. */ + break; + case RTAS_FADUMP_REAL_MODE_REGION: + base = be64_to_cpu(fdm->rgn[i].source_address); + size = be64_to_cpu(fdm->rgn[i].source_len); + pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); + if (!base) { + fadump_conf->boot_mem_dest_addr = + be64_to_cpu(fdm->rgn[i].destination_address); + } + + fadump_conf->boot_mem_addr[fadump_conf->boot_mem_regs_cnt] = base; + fadump_conf->boot_mem_sz[fadump_conf->boot_mem_regs_cnt] = size; + fadump_conf->boot_memory_size += size; + hole_size += (base - last_end); + last_end = base + size; + fadump_conf->boot_mem_regs_cnt++; + break; + case RTAS_FADUMP_PARAM_AREA: + fadump_conf->param_area = be64_to_cpu(fdm->rgn[i].destination_address); + break; + default: + pr_warn("Section type %d unsupported on this kernel. Ignoring!\n", type); + break; + } + } + fadump_conf->boot_mem_top = fadump_conf->boot_memory_size + hole_size; rtas_fadump_update_config(fadump_conf, fdm); } @@ -64,16 +98,15 @@ static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 sec_cnt = 0; memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct)); addr = addr & PAGE_MASK; fdm.header.dump_format_version = cpu_to_be32(0x00000001); - fdm.header.dump_num_sections = cpu_to_be16(3); fdm.header.dump_status_flag = 0; fdm.header.offset_first_dump_section = - cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, - cpu_state_data)); + cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, rgn)); /* * Fields for disk dump option. @@ -89,25 +122,22 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) /* Kernel dump sections */ /* cpu state data section. */ - fdm.cpu_state_data.request_flag = - cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.cpu_state_data.source_data_type = - cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA); - fdm.cpu_state_data.source_address = 0; - fdm.cpu_state_data.source_len = - cpu_to_be64(fadump_conf->cpu_state_data_size); - fdm.cpu_state_data.destination_address = cpu_to_be64(addr); + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA); + fdm.rgn[sec_cnt].source_address = 0; + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->cpu_state_data_size); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); addr += fadump_conf->cpu_state_data_size; + sec_cnt++; /* hpte region section */ - fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.hpte_region.source_data_type = - cpu_to_be16(RTAS_FADUMP_HPTE_REGION); - fdm.hpte_region.source_address = 0; - fdm.hpte_region.source_len = - cpu_to_be64(fadump_conf->hpte_region_size); - fdm.hpte_region.destination_address = cpu_to_be64(addr); + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_HPTE_REGION); + fdm.rgn[sec_cnt].source_address = 0; + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->hpte_region_size); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); addr += fadump_conf->hpte_region_size; + sec_cnt++; /* * Align boot memory area destination address to page boundary to @@ -115,14 +145,29 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) */ addr = PAGE_ALIGN(addr); - /* RMA region section */ - fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.rmr_region.source_data_type = - cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION); - fdm.rmr_region.source_address = cpu_to_be64(0); - fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size); - fdm.rmr_region.destination_address = cpu_to_be64(addr); - addr += fadump_conf->boot_memory_size; + /* First boot memory region destination address */ + fadump_conf->boot_mem_dest_addr = addr; + for (int i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { + /* Boot memory regions */ + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION); + fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->boot_mem_sz[i]); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); + addr += fadump_conf->boot_mem_sz[i]; + sec_cnt++; + } + + /* Parameters area */ + if (fadump_conf->param_area) { + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_PARAM_AREA); + fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->param_area); + fdm.rgn[sec_cnt].source_len = cpu_to_be64(COMMAND_LINE_SIZE); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(fadump_conf->param_area); + sec_cnt++; + } + fdm.header.dump_num_sections = cpu_to_be16(sec_cnt); rtas_fadump_update_config(fadump_conf, &fdm); @@ -136,14 +181,21 @@ static u64 rtas_fadump_get_bootmem_min(void) static int rtas_fadump_register(struct fw_dump *fadump_conf) { - unsigned int wait_time; + unsigned int wait_time, fdm_size; int rc, err = -EIO; + /* + * Platform requires the exact size of the Dump Memory Structure. + * Avoid including any unused rgns in the calculation, as this + * could result in a parameter error (-3) from the platform. + */ + fdm_size = sizeof(struct rtas_fadump_section_header); + fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section); + /* TODO: Add upper time limit for the delay */ do { rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, - NULL, FADUMP_REGISTER, &fdm, - sizeof(struct rtas_fadump_mem_struct)); + NULL, FADUMP_REGISTER, &fdm, fdm_size); wait_time = rtas_busy_delay_time(rc); if (wait_time) @@ -161,9 +213,7 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf) pr_err("Failed to register. Hardware Error(%d).\n", rc); break; case -3: - if (!is_fadump_boot_mem_contiguous()) - pr_err("Can't have holes in boot memory area.\n"); - else if (!is_fadump_reserved_mem_contiguous()) + if (!is_fadump_reserved_mem_contiguous()) pr_err("Can't have holes in reserved memory area.\n"); pr_err("Failed to register. Parameter Error(%d).\n", rc); @@ -316,11 +366,9 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) u32 num_cpus, *note_buf; int i, rc = 0, cpu = 0; struct pt_regs regs; - unsigned long addr; void *vaddr; - addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address); - vaddr = __va(addr); + vaddr = (void *)fadump_conf->cpu_state_dest_vaddr; reg_header = vaddr; if (be64_to_cpu(reg_header->magic_number) != @@ -375,11 +423,8 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) } final_note(note_buf); - if (fdh) { - pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); - } + pr_debug("Updating elfcore header (%llx) with cpu notes\n", fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; error_out: @@ -389,57 +434,66 @@ error_out: } /* - * Validate and process the dump data stored by firmware before exporting - * it through '/proc/vmcore'. + * Validate and process the dump data stored by the firmware, and update + * the CPU notes of elfcorehdr. */ static int __init rtas_fadump_process(struct fw_dump *fadump_conf) { - struct fadump_crash_info_header *fdh; - int rc = 0; - if (!fdm_active || !fadump_conf->fadumphdr_addr) return -EINVAL; /* Check if the dump data is valid. */ - if ((be16_to_cpu(fdm_active->header.dump_status_flag) == - RTAS_FADUMP_ERROR_FLAG) || - (fdm_active->cpu_state_data.error_flags != 0) || - (fdm_active->rmr_region.error_flags != 0)) { - pr_err("Dump taken by platform is not valid\n"); - return -EINVAL; - } - if ((fdm_active->rmr_region.bytes_dumped != - fdm_active->rmr_region.source_len) || - !fdm_active->cpu_state_data.bytes_dumped) { - pr_err("Dump taken by platform is incomplete\n"); - return -EINVAL; - } + for (int i = 0; i < be16_to_cpu(fdm_active->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm_active->rgn[i].source_data_type); + int rc = 0; - /* Validate the fadump crash info header */ - fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return -EINVAL; + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + case RTAS_FADUMP_HPTE_REGION: + case RTAS_FADUMP_REAL_MODE_REGION: + if (fdm_active->rgn[i].error_flags != 0) { + pr_err("Dump taken by platform is not valid (%d)\n", i); + rc = -EINVAL; + } + if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len) { + pr_err("Dump taken by platform is incomplete (%d)\n", i); + rc = -EINVAL; + } + if (rc) { + pr_warn("Region type: %u src addr: 0x%llx dest addr: 0x%llx\n", + be16_to_cpu(fdm_active->rgn[i].source_data_type), + be64_to_cpu(fdm_active->rgn[i].source_address), + be64_to_cpu(fdm_active->rgn[i].destination_address)); + return rc; + } + break; + case RTAS_FADUMP_PARAM_AREA: + if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len || + fdm_active->rgn[i].error_flags != 0) { + pr_warn("Failed to process additional parameters! Proceeding anyway..\n"); + fadump_conf->param_area = 0; + } + break; + default: + /* + * If the first/crashed kernel added a new region type that the + * second/fadump kernel doesn't recognize, skip it and process + * assuming backward compatibility. + */ + pr_warn("Unknown region found: type: %u src addr: 0x%llx dest addr: 0x%llx\n", + be16_to_cpu(fdm_active->rgn[i].source_data_type), + be64_to_cpu(fdm_active->rgn[i].source_address), + be64_to_cpu(fdm_active->rgn[i].destination_address)); + break; + } } - rc = rtas_fadump_build_cpu_notes(fadump_conf); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return 0; + return rtas_fadump_build_cpu_notes(fadump_conf); } static void rtas_fadump_region_show(struct fw_dump *fadump_conf, struct seq_file *m) { - const struct rtas_fadump_section *cpu_data_section; const struct rtas_fadump_mem_struct *fdm_ptr; if (fdm_active) @@ -447,27 +501,49 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = &fdm; - cpu_data_section = &(fdm_ptr->cpu_state_data); - seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", - be64_to_cpu(cpu_data_section->destination_address), - be64_to_cpu(cpu_data_section->destination_address) + - be64_to_cpu(cpu_data_section->source_len) - 1, - be64_to_cpu(cpu_data_section->source_len), - be64_to_cpu(cpu_data_section->bytes_dumped)); - - seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", - be64_to_cpu(fdm_ptr->hpte_region.destination_address), - be64_to_cpu(fdm_ptr->hpte_region.destination_address) + - be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, - be64_to_cpu(fdm_ptr->hpte_region.source_len), - be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); - - seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - be64_to_cpu(fdm_ptr->rmr_region.source_address), - be64_to_cpu(fdm_ptr->rmr_region.destination_address)); - seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - be64_to_cpu(fdm_ptr->rmr_region.source_len), - be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); + + for (int i = 0; i < be16_to_cpu(fdm_ptr->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm_ptr->rgn[i].source_data_type); + + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_HPTE_REGION: + seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_REAL_MODE_REGION: + seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", + be64_to_cpu(fdm_ptr->rgn[i].source_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address)); + seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_PARAM_AREA: + seq_printf(m, "\n[%#016llx-%#016llx]: cmdline append: '%s'\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + (char *)__va(be64_to_cpu(fdm_ptr->rgn[i].destination_address))); + break; + default: + seq_printf(m, "Unknown region type %d : Src: %#016llx, Dest: %#016llx, ", + type, be64_to_cpu(fdm_ptr->rgn[i].source_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address)); + break; + } + } /* Dump is active. Show preserved area start address. */ if (fdm_active) { @@ -483,6 +559,20 @@ static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, rtas_os_term((char *)msg); } +/* FADUMP_MAX_MEM_REGS or lower */ +static int rtas_fadump_max_boot_mem_rgns(void) +{ + /* + * Version 1 of Kernel Assisted Dump Memory Structure (PAPR) supports 10 sections. + * With one each section taken for CPU state data & HPTE respectively, 8 sections + * can be used for boot memory regions. + * + * If new region(s) is(are) defined, maximum boot memory regions will decrease + * proportionally. + */ + return RTAS_FADUMP_MAX_BOOT_MEM_REGS; +} + static struct fadump_ops rtas_fadump_ops = { .fadump_init_mem_struct = rtas_fadump_init_mem_struct, .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min, @@ -492,6 +582,7 @@ static struct fadump_ops rtas_fadump_ops = { .fadump_process = rtas_fadump_process, .fadump_region_show = rtas_fadump_region_show, .fadump_trigger = rtas_fadump_trigger, + .fadump_max_boot_mem_rgns = rtas_fadump_max_boot_mem_rgns, }; void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) @@ -508,9 +599,10 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!token) return; - fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token); - fadump_conf->ops = &rtas_fadump_ops; - fadump_conf->fadump_supported = 1; + fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token); + fadump_conf->ops = &rtas_fadump_ops; + fadump_conf->fadump_supported = 1; + fadump_conf->param_area_supported = 1; /* Firmware supports 64-bit value for size, align it to pagesize. */ fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h index fd59bd7ca9c3..c109abf6befd 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.h +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h @@ -23,12 +23,24 @@ #define RTAS_FADUMP_HPTE_REGION 0x0002 #define RTAS_FADUMP_REAL_MODE_REGION 0x0011 +/* OS defined sections */ +#define RTAS_FADUMP_PARAM_AREA 0x0100 + /* Dump request flag */ #define RTAS_FADUMP_REQUEST_FLAG 0x00000001 /* Dump status flag */ #define RTAS_FADUMP_ERROR_FLAG 0x2000 +/* + * The Firmware Assisted Dump Memory structure supports a maximum of 10 sections + * in the dump memory structure. Presently, three sections are used for + * CPU state data, HPTE & Parameters area, while the remaining seven sections + * can be used for boot memory regions. + */ +#define MAX_SECTIONS 10 +#define RTAS_FADUMP_MAX_BOOT_MEM_REGS 7 + /* Kernel Dump section info */ struct rtas_fadump_section { __be32 request_flag; @@ -61,20 +73,15 @@ struct rtas_fadump_section_header { * Firmware Assisted dump memory structure. This structure is required for * registering future kernel dump with power firmware through rtas call. * - * No disk dump option. Hence disk dump path string section is not included. + * In version 1, the platform permits one section header, dump-disk path + * and ten sections. + * + * Note: No disk dump option. Hence disk dump path string section is not + * included. */ struct rtas_fadump_mem_struct { struct rtas_fadump_section_header header; - - /* Kernel dump sections */ - struct rtas_fadump_section cpu_state_data; - struct rtas_fadump_section hpte_region; - - /* - * TODO: Extend multiple boot memory regions support in the kernel - * for this platform. - */ - struct rtas_fadump_section rmr_region; + struct rtas_fadump_section rgn[MAX_SECTIONS]; }; /* diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 71d52a670d95..ba3fb7a7f2ea 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -228,7 +228,7 @@ static irqreturn_t pseries_vas_irq_handler(int irq, void *data) struct pseries_vas_window *txwin = data; /* - * The thread hanlder will process this interrupt if it is + * The thread handler will process this interrupt if it is * already running. */ atomic_inc(&txwin->pending_faults); diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 90ff85c879bf..b2babfdbc40b 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1592,13 +1592,9 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env) const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; - cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (dn && (cp = of_get_property(dn, "compatible", NULL))) + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); - add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); return 0; } diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 9cb1d029511a..24a177d164f1 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 98096bbfd62e..c0d10c149661 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -24,7 +24,6 @@ #include <linux/suspend.h> #include <linux/memblock.h> #include <linux/gfp.h> -#include <linux/kmemleak.h> #include <linux/of_address.h> #include <asm/io.h> #include <asm/iommu.h> @@ -243,9 +242,6 @@ static void __init allocate_dart(void) if (!dart_tablebase) panic("Failed to allocate 16MB below 2GB for DART table\n"); - /* There is no point scanning the DART space for leaks*/ - kmemleak_no_scan((void *)dart_tablebase); - /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 39186ad6b3c3..3dabc9621810 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -77,7 +77,7 @@ struct gtm { static LIST_HEAD(gtms); /** - * gtm_get_timer - request GTM timer to use it with the rest of GTM API + * gtm_get_timer16 - request GTM timer to use it with the rest of GTM API * Context: non-IRQ * * This function reserves GTM timer for later use. It returns gtm_timer @@ -110,7 +110,7 @@ struct gtm_timer *gtm_get_timer16(void) EXPORT_SYMBOL(gtm_get_timer16); /** - * gtm_get_specific_timer - request specific GTM timer + * gtm_get_specific_timer16 - request specific GTM timer * @gtm: specific GTM, pass here GTM's device_node->data * @timer: specific timer number, Timer1 is 0. * Context: non-IRQ @@ -260,7 +260,7 @@ int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec, bool reload) EXPORT_SYMBOL(gtm_set_timer16); /** - * gtm_set_exact_utimer16 - (re)set 16 bits timer + * gtm_set_exact_timer16 - (re)set 16 bits timer * @tmr: pointer to the gtm_timer structure obtained from gtm_get_timer * @usec: timer interval in microseconds * @reload: if set, the timer will reset upon expiry rather than diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 8e6c84df4ca1..e205135ae1fe 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -564,10 +564,12 @@ static const struct fsl_msi_feature ipic_msi_feature = { .msiir_offset = 0x38, }; +#ifdef CONFIG_EPAPR_PARAVIRT static const struct fsl_msi_feature vmpic_msi_feature = { .fsl_pic_ip = FSL_PIC_IP_VMPIC, .msiir_offset = 0, }; +#endif static const struct of_device_id fsl_of_msi_ids[] = { { diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a289cb97c1d7..fa01818c1972 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -383,7 +383,7 @@ static unsigned int xive_get_irq(void) * CPU. * * If we find that there is indeed more in there, we call - * force_external_irq_replay() to make Linux synthetize an + * force_external_irq_replay() to make Linux synthesize an * external interrupt on the next call to local_irq_restore(). */ static void xive_do_queue_eoi(struct xive_cpu *xc) @@ -874,7 +874,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * * This also tells us that it's in flight to a host queue * or has already been fetched but hasn't been EOIed yet - * by the host. This it's potentially using up a host + * by the host. Thus it's potentially using up a host * queue slot. This is important to know because as long * as this is the case, we must not hard-unmask it when * "returning" that interrupt to the host. diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index f1c0fa6ece21..517b963e3e6a 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -415,7 +415,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) return; } - /* Grab it's CAM value */ + /* Grab its CAM value */ rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL); if (rc) { pr_err("Failed to get pool VP info CPU %d\n", cpu); diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 682c7c0a6f77..d778011060a8 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -10,8 +10,6 @@ KCSAN_SANITIZE := n # Disable ftrace for the entire directory ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - # Clang stores addresses on the stack causing the frame size to blow # out. See https://github.com/ClangBuiltLinux/linux/issues/252 ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096 diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d79d6633f333..bd4813bad317 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1350,7 +1350,7 @@ static int cpu_cmd(void) } termch = cpu; - if (!scanhex(&cpu)) { + if (!scanhex(&cpu) || cpu >= num_possible_cpus()) { /* print cpus waiting or in xmon */ printf("cpus stopped:"); last_cpu = first_cpu = NR_CPUS; @@ -2772,7 +2772,7 @@ static void dump_pacas(void) termch = c; /* Put c back, it wasn't 'a' */ - if (scanhex(&num)) + if (scanhex(&num) && num < num_possible_cpus()) dump_one_paca(num); else dump_one_paca(xmon_owner); @@ -2845,7 +2845,7 @@ static void dump_xives(void) termch = c; /* Put c back, it wasn't 'a' */ - if (scanhex(&num)) + if (scanhex(&num) && num < num_possible_cpus()) dump_one_xive(num); else dump_one_xive(xmon_owner); diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 2552e78074a3..af9601da4643 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -11,7 +11,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 8f67fb830585..dd460c649152 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=13 CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index de8143d1f738..d4b03dc3c2c0 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -10,7 +10,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set # CONFIG_TIMERFD is not set diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 2468c55933cd..25966995da04 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -168,7 +168,8 @@ #define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT) #define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ (_AC(1, UL) << IRQ_S_TIMER) | \ - (_AC(1, UL) << IRQ_S_EXT)) + (_AC(1, UL) << IRQ_S_EXT) | \ + (_AC(1, UL) << IRQ_PMU_OVF)) /* AIA CSR bits */ #define TOPI_IID_SHIFT 16 @@ -281,7 +282,7 @@ #define CSR_HPMCOUNTER30H 0xc9e #define CSR_HPMCOUNTER31H 0xc9f -#define CSR_SSCOUNTOVF 0xda0 +#define CSR_SCOUNTOVF 0xda0 #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 484d04a92fa6..d96281278586 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -43,6 +43,17 @@ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) +#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ + BIT(EXC_BREAKPOINT) | \ + BIT(EXC_SYSCALL) | \ + BIT(EXC_INST_PAGE_FAULT) | \ + BIT(EXC_LOAD_PAGE_FAULT) | \ + BIT(EXC_STORE_PAGE_FAULT)) + +#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ + BIT(IRQ_VS_TIMER) | \ + BIT(IRQ_VS_EXT)) + enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, KVM_RISCV_HFENCE_GVMA_VMID_GPA, @@ -169,6 +180,7 @@ struct kvm_vcpu_csr { struct kvm_vcpu_config { u64 henvcfg; u64 hstateen0; + unsigned long hedeleg; }; struct kvm_vcpu_smstateen_csr { @@ -211,6 +223,7 @@ struct kvm_vcpu_arch { /* CPU context upon Guest VCPU reset */ struct kvm_cpu_context guest_reset_context; + spinlock_t reset_cntx_lock; /* CPU CSR context upon Guest VCPU reset */ struct kvm_vcpu_csr guest_reset_csr; @@ -252,8 +265,9 @@ struct kvm_vcpu_arch { /* Cache pages needed to program page tables with spinlock held */ struct kvm_mmu_memory_cache mmu_page_cache; - /* VCPU power-off state */ - bool power_off; + /* VCPU power state */ + struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Don't run the VCPU (blocked) */ bool pause; @@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 395518a1664e..fa0f535bbbf0 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64); struct kvm_fw_event { /* Current value of the event */ - unsigned long value; + u64 value; /* Event monitoring status */ bool started; @@ -36,6 +36,7 @@ struct kvm_pmc { bool started; /* Monitoring event ID */ unsigned long event_idx; + struct kvm_vcpu *vcpu; }; /* PMU data structure per vcpu */ @@ -50,6 +51,12 @@ struct kvm_pmu { bool init_done; /* Bit map of all the virtual counter used */ DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); + /* Bit map of all the virtual counter overflown */ + DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS); + /* The address of the counter snapshot area (guest physical address) */ + gpa_t snapshot_addr; + /* The actual data of the snapshot */ + struct riscv_pmu_snapshot_data *sdata; }; #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) @@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long ctr_mask, unsigned long flags, unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata); -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 6afd6bb4882e..381137ce70d4 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -55,6 +55,9 @@ #define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G) #define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +#else +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END #endif /* diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..112a0a0d9f46 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_START, SBI_EXT_PMU_COUNTER_STOP, SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, }; union sbi_pmu_ctr_info { @@ -147,6 +149,13 @@ union sbi_pmu_ctr_info { }; }; +/* Data structure to contain the pmu snapshot data */ +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 @@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type { #define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF /* Flags defined for config matching function */ -#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) -#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) -#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) -#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) -#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4) -#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) -#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) -#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) /* Flags defined for counter start function */ -#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) /* Flags defined for counter stop function */ -#define SBI_PMU_STOP_FLAG_RESET (1 << 0) +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) enum sbi_ext_dbcn_fid { SBI_EXT_DBCN_CONSOLE_WRITE = 0, @@ -266,7 +277,7 @@ struct sbi_sta_struct { u8 pad[47]; } __packed; -#define SBI_STA_SHMEM_DISABLE -1 +#define SBI_SHMEM_DISABLE -1 /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 @@ -284,6 +295,7 @@ struct sbi_sta_struct { #define SBI_ERR_ALREADY_AVAILABLE -6 #define SBI_ERR_ALREADY_STARTED -7 #define SBI_ERR_ALREADY_STOPPED -8 +#define SBI_ERR_NO_SHMEM -9 extern unsigned long sbi_spec_version; struct sbiret { @@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void) static inline unsigned long sbi_mk_version(unsigned long major, unsigned long minor) { - return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << - SBI_SPEC_VERSION_MAJOR_SHIFT) | minor; + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); } int sbi_err_map_linux_errno(int err); diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index b1c503c2959c..e878e7cc3978 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_ZTSO, KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5e5a82644451..906f9a3a5d65 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/moduleloader.h> -#include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> #include <asm/alternative.h> @@ -905,17 +904,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 0d6225fd3194..fa6b0339a65d 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, lo, hi, flags, 0, 0, 0); if (ret.error) { - if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) pr_warn("Failed to disable steal-time shmem"); else pr_warn("Failed to set steal-time shmem"); @@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu) static int pv_time_cpu_down_prepare(unsigned int cpu) { - return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, - SBI_STA_SHMEM_DISABLE, 0); + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); } static u64 pv_time_steal_clock(int cpu) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index 7142ec42e889..a69dfa610aa8 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -11,6 +11,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d..e64f2f3064eb 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_MMU -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_READ_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index a944294f6f23..0f0a9d11bb5f 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void) enable_percpu_irq(hgei_parent_irq, irq_get_trigger_type(hgei_parent_irq)); csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) @@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void) return; hgctrl = get_cpu_ptr(&aia_hgei); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); /* Disable per-CPU SGEI interrupt */ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..bab2ec34cd87 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_hardware_enable(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a9e2fd7245e1..b63650f9b966 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - int ret; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.pgd) - return false; - - WARN_ON(range->end - range->start != 1); - - ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, - __pfn_to_phys(pfn), PAGE_SIZE, true, true); - if (ret) { - kvm_debug("Failed to map G-stage page (error %d)\n", ret); - return true; - } - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { pte_t *ptep; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index b5ca9f2e98ac..17e21df36cc1 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr)); + spin_lock(&vcpu->arch.reset_cntx_lock); memcpy(cntx, reset_cntx, sizeof(*cntx)); + spin_unlock(&vcpu->arch.reset_cntx_lock); kvm_riscv_vcpu_fp_reset(vcpu); @@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.hfence_lock); /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + spin_lock_init(&vcpu->arch.reset_cntx_lock); + + spin_lock(&vcpu->arch.reset_cntx_lock); cntx = &vcpu->arch.guest_reset_context; cntx->sstatus = SR_SPP | SR_SPIE; cntx->hstatus = 0; cntx->hstatus |= HSTATUS_VTW; cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + spin_unlock(&vcpu->arch.reset_cntx_lock); if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) return -ENOMEM; @@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && - !vcpu->arch.power_off && !vcpu->arch.pause); + !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) } } + /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { + if (!(hvip & (1UL << IRQ_PMU_OVF)) && + !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) + clear_bit(IRQ_PMU_OVF, v->irqs_pending); + } + /* Sync-up AIA high interrupts */ kvm_riscv_vcpu_aia_sync_interrupts(vcpu); @@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); @@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } -void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } -void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); } +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_on(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_riscv_vcpu_power_off(vcpu); + __kvm_riscv_vcpu_power_off(vcpu); break; default: ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); csr_write(CSR_HENVCFG, cfg->henvcfg); @@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) && (!vcpu->arch.pause), + (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); kvm_vcpu_srcu_read_lock(vcpu); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* * Awaken to handle a signal, request to * sleep again later. diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 2415722c01b8..5761f95abb60 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 994adc26db4b..c676275ea0a0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = { /* Multi letter extensions (alphabetically sorted) */ KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 86391a5061dd..04db1f993c47 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) u64 sample_period; if (!pmc->counter_val) - sample_period = counter_val_mask + 1; + sample_period = counter_val_mask; else sample_period = (-pmc->counter_val) & counter_val_mask; @@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); } +static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + unsigned long *out_val) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int fevent_code; + + if (!IS_ENABLED(CONFIG_32BIT)) { + pr_warn("%s: should be invoked for only RV32\n", __func__); + return -EINVAL; + } + + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld]during read\n", cidx); + return -EINVAL; + } + + pmc = &kvpmu->pmc[cidx]; + + if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) + return -EINVAL; + + fevent_code = get_event_code(pmc->event_idx); + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + + *out_val = pmc->counter_val >> 32; + + return 0; +} + static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, unsigned long *out_val) { @@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, u64 enabled, running; int fevent_code; + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld] during read\n", cidx); + return -EINVAL; + } + pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } -static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, - unsigned long flags, unsigned long eidx, unsigned long evtdata) +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, + unsigned long flags, unsigned long eidx, + unsigned long evtdata) { struct perf_event *event; @@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); @@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + + if (kvpmu->sdata) { + if (kvpmu->snapshot_addr != INVALID_GPA) { + memset(kvpmu->sdata, 0, snapshot_area_size); + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, + kvpmu->sdata, snapshot_area_size); + } else { + pr_warn("snapshot address invalid\n"); + } + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + } + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr_high << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; + /* The guest started the counter again. Reset the overflow status */ + clear_bit(pmc_index, kvpmu->pmc_overflown); pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, sbiret = SBI_ERR_ALREADY_STOPPED; } - if (flags & SBI_PMU_STOP_FLAG_RESET) { - /* Relase the counter if this is a reset request */ - pmc->counter_val += perf_event_read_value(pmc->perf_event, - &enabled, &running); + if (flags & SBI_PMU_STOP_FLAG_RESET) + /* Release the counter if this is a reset request */ kvm_pmu_release_perf_event(pmc); - } } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= BIT(i); + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); + if (snap_flag_set) { + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); + shmem_needs_update = true; + } } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata) { - int ctr_idx, ret, sbiret = 0; + int ctr_idx, sbiret = 0; + long ret; bool is_fevent; unsigned long event_code; u32 etype = kvm_pmu_get_perf_event_type(eidx); @@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba kvpmu->fw_event[event_code].started = true; } else { ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); - if (ret) - return ret; + if (ret) { + sbiret = SBI_ERR_NOT_SUPPORTED; + goto out; + } } set_bit(ctr_idx, kvpmu->pmc_in_use); @@ -530,7 +740,19 @@ out: return 0; } -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + + ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); + if (ret == -EINVAL) + retdata->err_val = SBI_ERR_INVALID_PARAM; + + return 0; +} + +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata) { int ret; @@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc->cinfo.csr = CSR_CYCLE + i; } else { pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; - pmc->cinfo.width = BITS_PER_LONG - 1; + pmc->cinfo.width = 63; } } @@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 72a2ffb8dcd1..62f409d4176e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, unsigned long i; struct kvm_vcpu *tmp; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&vcpu->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&vcpu->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&run->system_event, 0, sizeof(run->system_event)); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 7dca0e9381d9..dce667f4b6ab 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; + int ret = 0; target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_ERR_ALREADY_AVAILABLE; + spin_lock(&target_vcpu->arch.mp_state_lock); + + if (!kvm_riscv_vcpu_stopped(target_vcpu)) { + ret = SBI_ERR_ALREADY_AVAILABLE; + goto out; + } + + spin_lock(&target_vcpu->arch.reset_cntx_lock); reset_cntx = &target_vcpu->arch.guest_reset_context; /* start address */ reset_cntx->sepc = cp->a1; @@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) reset_cntx->a0 = target_vcpuid; /* private data passed from kernel */ reset_cntx->a1 = cp->a2; + spin_unlock(&target_vcpu->arch.reset_cntx_lock); + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); - kvm_riscv_vcpu_power_on(target_vcpu); + __kvm_riscv_vcpu_power_on(target_vcpu); - return 0; +out: + spin_unlock(&target_vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) { - if (vcpu->arch.power_off) - return SBI_ERR_FAILURE; + int ret = 0; - kvm_riscv_vcpu_power_off(vcpu); + spin_lock(&vcpu->arch.mp_state_lock); - return 0; + if (kvm_riscv_vcpu_stopped(vcpu)) { + ret = SBI_ERR_FAILURE; + goto out; + } + + __kvm_riscv_vcpu_power_off(vcpu); + +out: + spin_unlock(&vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) @@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) + if (!kvm_riscv_vcpu_stopped(target_vcpu)) return SBI_HSM_STATE_STARTED; else if (vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; @@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, { int ret = 0; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - struct kvm *kvm = vcpu->kvm; unsigned long funcid = cp->a6; switch (funcid) { case SBI_EXT_HSM_HART_START: - mutex_lock(&kvm->lock); ret = kvm_sbi_hsm_vcpu_start(vcpu); - mutex_unlock(&kvm->lock); break; case SBI_EXT_HSM_HART_STOP: ret = kvm_sbi_hsm_vcpu_stop(vcpu); diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index 7eca72df2cbd..e4be34e03e83 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, #endif /* * This can fail if perf core framework fails to create an event. - * Forward the error to userspace because it's an error which - * happened within the host kernel. The other option would be - * to convert to an SBI error and forward to the guest. + * No need to forward the error to userspace and exit the guest. + * The operation can continue without profiling. Forward the + * appropriate SBI error to the guest. */ ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp, retdata); @@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; case SBI_EXT_PMU_COUNTER_FW_READ: - ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); + ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata); + break; + case SBI_EXT_PMU_COUNTER_FW_READ_HI: + if (IS_ENABLED(CONFIG_32BIT)) + ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata); + else + retdata->out_val = 0; + break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index d8cf9ca28c61..5f35427114c1 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) if (flags != 0) return SBI_ERR_INVALID_PARAM; - if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && - shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + if (shmem_phys_lo == SBI_SHMEM_DISABLE && + shmem_phys_hi == SBI_SHMEM_DISABLE) { vcpu->arch.sta.shmem = INVALID_GPA; return 0; } diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index ce58bc48e5b8..7396b8654f45 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 968761843203..9940171c79f0 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -24,6 +24,7 @@ #include <linux/elf.h> #endif #include <linux/kfence.h> +#include <linux/execmem.h> #include <asm/fixmap.h> #include <asm/io.h> @@ -1481,3 +1482,37 @@ void __init pgtable_cache_init(void) preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); } #endif + +#ifdef CONFIG_EXECMEM +#ifdef CONFIG_MMU +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_READ_EXEC, + .alignment = 1, + }, + [EXECMEM_BPF] = { + .start = BPF_JIT_REGION_START, + .end = BPF_JIT_REGION_END, + .pgprot = PAGE_KERNEL, + .alignment = PAGE_SIZE, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_MMU */ +#endif /* CONFIG_EXECMEM */ diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 8a69d6d81e32..0a96abdaca65 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -221,19 +221,6 @@ u64 bpf_jit_alloc_exec_limit(void) return BPF_JIT_REGION_SIZE; } -void *bpf_jit_alloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} - void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c46381ea04ec..ddf2ee47cb87 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -7,13 +7,13 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/moduleloader.h> #include <linux/hardirq.h> #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/kprobes.h> +#include <linux/execmem.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> #include <asm/text-patching.h> @@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void) { const char *start, *end; - ftrace_plt = module_alloc(PAGE_SIZE); + ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE); if (!ftrace_plt) panic("cannot allocate ftrace plt\n"); @@ -296,6 +296,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index f0cf20d4b3c5..3c1b1be744de 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -9,7 +9,6 @@ #define pr_fmt(fmt) "kprobes: " fmt -#include <linux/moduleloader.h> #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> @@ -21,6 +20,7 @@ #include <linux/slab.h> #include <linux/hardirq.h> #include <linux/ftrace.h> +#include <linux/execmem.h> #include <asm/set_memory.h> #include <asm/sections.h> #include <asm/dis.h> @@ -38,7 +38,7 @@ void *alloc_insn_page(void) { void *page; - page = module_alloc(PAGE_SIZE); + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!page) return NULL; set_memory_rox((unsigned long)page, 1); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 42215f9404af..91e207b50394 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -21,6 +21,7 @@ #include <linux/moduleloader.h> #include <linux/bug.h> #include <linux/memory.h> +#include <linux/execmem.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> @@ -36,47 +37,10 @@ #define PLT_ENTRY_SIZE 22 -static unsigned long get_module_load_offset(void) -{ - static DEFINE_MUTEX(module_kaslr_mutex); - static unsigned long module_load_offset; - - if (!kaslr_enabled()) - return 0; - /* - * Calculate the module_load_offset the first time this code - * is called. Once calculated it stays the same until reboot. - */ - mutex_lock(&module_kaslr_mutex); - if (!module_load_offset) - module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; - mutex_unlock(&module_kaslr_mutex); - return module_load_offset; -} - -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, PAGE_KERNEL, - VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { - vfree(p); - return NULL; - } - return p; -} - #ifdef CONFIG_FUNCTION_TRACER void module_arch_cleanup(struct module *mod) { - module_memfree(mod->arch.trampolines_start); + execmem_free(mod->arch.trampolines_start); } #endif @@ -510,7 +474,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); numpages = DIV_ROUND_UP(size, PAGE_SIZE); - start = module_alloc(numpages * PAGE_SIZE); + start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE); if (!start) return -ENOMEM; set_memory_rox((unsigned long)start, numpages); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f6391442c0c2..e769d2726f4e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -49,6 +49,7 @@ #include <asm/uv.h> #include <linux/virtio_anchor.h> #include <linux/virtio_config.h> +#include <linux/execmem.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); @@ -302,3 +303,32 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long module_load_offset = 0; + unsigned long start; + + if (kaslr_enabled()) + module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; + + start = MODULES_VADDR + module_load_offset; + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index 9ee35269bee2..ab3bf72264df 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -6,7 +6,7 @@ # CONFIG_PRINTK is not set # CONFIG_BUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 14d0f5ead502..4765966fec99 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -4,7 +4,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index dc854293da43..20f07aee5bde 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index c891945b8a90..00862d3c030d 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index e078b193a78a..bfeb004f130e 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 9e85d57ac3f2..62bcafe38b1f 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, #define VMALLOC_START _AC(0xfe600000,UL) #define VMALLOC_END _AC(0xffc00000,UL) +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END /* We provide our own get_unmapped_area to cope with VA holes for userland */ #define HAVE_ARCH_UNMAPPED_AREA diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 66c45a2764bc..b8c51cc23d96 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -21,36 +21,6 @@ #include "entry.h" -#ifdef CONFIG_SPARC64 - -#include <linux/jump_label.h> - -static void *module_map(unsigned long size) -{ - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#else -static void *module_map(unsigned long size) -{ - return vmalloc(size); -} -#endif /* CONFIG_SPARC64 */ - -void *module_alloc(unsigned long size) -{ - void *ret; - - ret = module_map(size); - if (ret) - memset(ret, 0, size); - - return ret; -} - /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 809d993f6d88..2d1752108d77 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -14,3 +14,5 @@ obj-$(CONFIG_SPARC32) += leon_mm.o # Only used by sparc64 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o + +obj-$(CONFIG_EXECMEM) += execmem.o diff --git a/arch/sparc/mm/execmem.c b/arch/sparc/mm/execmem.c new file mode 100644 index 000000000000..0fac97dd5728 --- /dev/null +++ b/arch/sparc/mm/execmem.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/mm.h> +#include <linux/execmem.h> + +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index da2df1e84ed4..bda2dbd3f4c5 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/moduleloader.h> #include <linux/workqueue.h> #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/cache.h> #include <linux/if_vlan.h> +#include <linux/execmem.h> #include <asm/cacheflush.h> #include <asm/ptrace.h> @@ -713,7 +713,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); kfree(addrs); - module_memfree(image); + execmem_free(image); return; } memcpy(image + proglen, temp, ilen); @@ -736,7 +736,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } if (proglen == oldproglen) { - image = module_alloc(proglen); + image = execmem_alloc(EXECMEM_BPF, proglen); if (!image) goto out; } @@ -758,7 +758,7 @@ out: void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) - module_memfree(fp->bpf_func); + execmem_free(fp->bpf_func); bpf_prog_unlock_free(fp); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d16fee6bdb8..bc47bc9841ff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,6 +34,7 @@ config X86_64 select SWIOTLB select ARCH_HAS_ELFCORE_COMPAT select ZONE_DMA32 + select EXECMEM if DYNAMIC_FTRACE config FORCE_DYNAMIC_FTRACE def_bool y diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a2be3aefff9f..f86ad3335529 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe extern u64 xstate_get_guest_group_perm(void); +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 91ca9a9ee3a2..ae5482a2f0ca 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -207,18 +207,11 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image); extern void kdump_nmi_shootdown_cpus(void); #ifdef CONFIG_CRASH_HOTPLUG -void arch_crash_handle_hotplug_event(struct kimage *image); +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event -#ifdef CONFIG_HOTPLUG_CPU -int arch_crash_hotplug_cpu_support(void); -#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support -#endif - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_crash_hotplug_memory_support(void); -#define crash_hotplug_memory_support arch_crash_hotplug_memory_support -#endif +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support unsigned int arch_crash_get_elfcorehdr_size(void); #define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 110d7f29ca9a..5187fcf4b610 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -121,6 +121,7 @@ KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) #endif +KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6efd1497b026..ece45b3f6f20 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -254,28 +254,31 @@ enum x86_intercept_stage; KVM_GUESTDBG_INJECT_DB | \ KVM_GUESTDBG_BLOCKIRQ) +#define PFERR_PRESENT_MASK BIT(0) +#define PFERR_WRITE_MASK BIT(1) +#define PFERR_USER_MASK BIT(2) +#define PFERR_RSVD_MASK BIT(3) +#define PFERR_FETCH_MASK BIT(4) +#define PFERR_PK_MASK BIT(5) +#define PFERR_SGX_MASK BIT(15) +#define PFERR_GUEST_RMP_MASK BIT_ULL(31) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(32) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_GUEST_ENC_MASK BIT_ULL(34) +#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) +#define PFERR_GUEST_VMPL_MASK BIT_ULL(36) -#define PFERR_PRESENT_BIT 0 -#define PFERR_WRITE_BIT 1 -#define PFERR_USER_BIT 2 -#define PFERR_RSVD_BIT 3 -#define PFERR_FETCH_BIT 4 -#define PFERR_PK_BIT 5 -#define PFERR_SGX_BIT 15 -#define PFERR_GUEST_FINAL_BIT 32 -#define PFERR_GUEST_PAGE_BIT 33 -#define PFERR_IMPLICIT_ACCESS_BIT 48 - -#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) -#define PFERR_USER_MASK BIT(PFERR_USER_BIT) -#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) -#define PFERR_PK_MASK BIT(PFERR_PK_BIT) -#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +/* + * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks + * when emulating instructions that triggers implicit access. + */ +#define PFERR_IMPLICIT_ACCESS BIT_ULL(48) +/* + * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred + * when the guest was accessing private memory. + */ +#define PFERR_PRIVATE_ACCESS BIT_ULL(49) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ @@ -994,9 +997,6 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; - /* set at EPT violation at this point */ - unsigned long exit_qualification; - /* pv related host specific info */ struct { bool pv_unhalted; @@ -1280,12 +1280,14 @@ enum kvm_apicv_inhibit { }; struct kvm_arch { - unsigned long vm_type; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; u8 mmu_valid_gen; + u8 vm_type; + bool has_private_mem; + bool has_protected_state; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -1312,6 +1314,8 @@ struct kvm_arch { */ spinlock_t mmu_unsync_pages_lock; + u64 shadow_mmio_value; + struct iommu_domain *iommu_domain; bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA @@ -1779,6 +1783,7 @@ struct kvm_x86_ops { void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif + int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1844,6 +1849,7 @@ struct kvm_arch_async_pf { gfn_t gfn; unsigned long cr3; bool direct_map; + u64 error_code; }; extern u32 __read_mostly kvm_nr_uret_msrs; @@ -2140,6 +2146,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, @@ -2153,8 +2163,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); + #ifdef CONFIG_KVM_PRIVATE_MEM -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM) +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false #endif diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c72c7ff78fcd..d593e52e6635 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -16,10 +16,10 @@ extern int pic_mode; * Summit or generic (i.e. installer) kernels need lots of bus entries. * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#if CONFIG_BASE_SMALL == 0 -# define MAX_MP_BUSSES 260 -#else +#ifdef CONFIG_BASE_SMALL # define MAX_MP_BUSSES 32 +#else +# define MAX_MP_BUSSES 260 #endif #define MAX_IRQ_SOURCES 256 diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b463fcbd4b90..5a8246dd532f 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -54,8 +54,10 @@ (((unsigned long)fn) << 32)) /* AP Reset Hold */ -#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 -#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12 +#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0) /* GHCB GPA Register */ #define GHCB_MSR_REG_GPA_REQ 0x012 @@ -99,6 +101,8 @@ enum psc_op { /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_POS 12 +#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0) #define GHCB_MSR_HV_FT_RESP_VAL(v) \ /* GHCBData[63:12] */ \ (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4dba17363008..d77a31039f24 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) #define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) #define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE) #define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) #define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) @@ -226,6 +227,8 @@ enum vmcs_field { VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP_HIGH = 0x00002029, + VE_INFORMATION_ADDRESS = 0x0000202A, + VE_INFORMATION_ADDRESS_HIGH = 0x0000202B, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, ENCLS_EXITING_BITMAP = 0x0000202E, @@ -514,6 +517,7 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63) #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) @@ -630,4 +634,13 @@ enum vmx_l1d_flush_state { extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; +struct vmx_ve_information { + u32 exit_reason; + u32 delivery; + u64 exit_qualification; + u64 guest_linear_address; + u64 guest_physical_address; + u16 eptp_index; +}; + #endif diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ef11aa4cab42..9fae1b73b529 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -457,8 +457,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -689,6 +694,9 @@ enum sev_cmd_id { /* Guest Migration Extension */ KVM_SEV_SEND_CANCEL, + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + KVM_SEV_NR_MAX, }; @@ -700,6 +708,14 @@ struct kvm_sev_cmd { __u32 sev_fd; }; +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; +}; + struct kvm_sev_launch_start { __u32 handle; __u32 policy; @@ -856,5 +872,7 @@ struct kvm_hyperv_eventfd { #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e74d0c4286c1..f06501445cd9 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -402,20 +402,26 @@ int crash_load_segments(struct kimage *image) #undef pr_fmt #define pr_fmt(fmt) "crash hp: " fmt -/* These functions provide the value for the sysfs crash_hotplug nodes */ -#ifdef CONFIG_HOTPLUG_CPU -int arch_crash_hotplug_cpu_support(void) +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) { - return crash_check_update_elfcorehdr(); -} -#endif -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_crash_hotplug_memory_support(void) -{ - return crash_check_update_elfcorehdr(); -} +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; #endif + /* + * Initially, crash hotplug support for kexec_load was added + * with the KEXEC_UPDATE_ELFCOREHDR flag. Later, this + * functionality was expanded to accommodate multiple kexec + * segment updates, leading to the introduction of the + * KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag bit. Consequently, + * when the kexec tool sends either of these flags, it indicates + * that the required kexec segment (elfcorehdr) is excluded from + * the SHA calculation. + */ + return (kexec_flags & KEXEC_UPDATE_ELFCOREHDR || + kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT); +} unsigned int arch_crash_get_elfcorehdr_size(void) { @@ -432,10 +438,12 @@ unsigned int arch_crash_get_elfcorehdr_size(void) /** * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and + * NULL for CPU hotplug case. * * Prepare the new elfcorehdr and replace the existing elfcorehdr. */ -void arch_crash_handle_hotplug_event(struct kimage *image) +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; unsigned long nr_mem_ranges; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6276329f5e66..c5a026fee5e0 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -991,6 +991,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } +EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 19ca623ffa2a..05df04f39628 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -54,8 +54,6 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void extern void fpu__init_cpu_xstate(void); extern void fpu__init_system_xstate(unsigned int legacy_size); -extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); - static inline u64 xfeatures_mask_supervisor(void) { return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 70139d9d2e01..8da0e66ca22d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -25,6 +25,7 @@ #include <linux/memory.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include <trace/syscall.h> @@ -260,25 +261,14 @@ void arch_ftrace_update_code(int command) /* Currently only x86_64 supports dynamic trampolines */ #ifdef CONFIG_X86_64 -#ifdef CONFIG_MODULES -#include <linux/moduleloader.h> -/* Module allocation simplifies allocating memory for code */ static inline void *alloc_tramp(unsigned long size) { - return module_alloc(size); + return execmem_alloc(EXECMEM_FTRACE, size); } static inline void tramp_free(void *tramp) { - module_memfree(tramp); + execmem_free(tramp); } -#else -/* Trampolines can only be created if modules are supported */ -static inline void *alloc_tramp(unsigned long size) -{ - return NULL; -} -static inline void tramp_free(void *tramp) { } -#endif /* Defined as markers to the end of the ftrace default trampolines */ extern void ftrace_regs_caller_end(void); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d0e49bd7c6f3..72e6a45e7ec2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -40,12 +40,12 @@ #include <linux/kgdb.h> #include <linux/ftrace.h> #include <linux/kasan.h> -#include <linux/moduleloader.h> #include <linux/objtool.h> #include <linux/vmalloc.h> #include <linux/pgtable.h> #include <linux/set_memory.h> #include <linux/cfi.h> +#include <linux/execmem.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -495,7 +495,7 @@ void *alloc_insn_page(void) { void *page; - page = module_alloc(PAGE_SIZE); + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!page) return NULL; diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index dd2ec14adb77..15af7e98e161 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e18914c0e38a..837450b6e882 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -36,57 +36,6 @@ do { \ } while (0) #endif -#ifdef CONFIG_RANDOMIZE_BASE -static unsigned long module_load_offset; - -/* Mutex protects the module_load_offset. */ -static DEFINE_MUTEX(module_kaslr_mutex); - -static unsigned long int get_module_load_offset(void) -{ - if (kaslr_enabled()) { - mutex_lock(&module_kaslr_mutex); - /* - * Calculate the module_load_offset the first time this - * code is called. Once calculated it stays the same until - * reboot. - */ - if (module_load_offset == 0) - module_load_offset = - get_random_u32_inclusive(1, 1024) * PAGE_SIZE; - mutex_unlock(&module_kaslr_mutex); - } - return module_load_offset; -} -#else -static unsigned long int get_module_load_offset(void) -{ - return 0; -} -#endif - -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - - p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, PAGE_KERNEL, - VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { - vfree(p); - return NULL; - } - - return p; -} - #ifdef CONFIG_X86_32 int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 0ebdd088f28b..d64fb2b3eb69 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -95,6 +95,19 @@ config KVM_INTEL To compile this as a module, choose M here: the module will be called kvm-intel. +config KVM_INTEL_PROVE_VE + bool "Check that guests do not receive #VE exceptions" + default KVM_PROVE_MMU || DEBUG_KERNEL + depends on KVM_INTEL + help + + Checks that KVM's page table management code will not incorrectly + let guests receive a virtualization exception. Virtualization + exceptions will be trapped by the hypervisor rather than injected + in the guest. + + If unsure, say N. + config X86_SGX_KVM bool "Software Guard eXtensions (SGX) Virtualization" depends on X86_SGX && KVM_INTEL diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index addc44fc7187..5494669a055a 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -16,14 +16,15 @@ kvm-$(CONFIG_KVM_XEN) += xen.o kvm-$(CONFIG_KVM_SMM) += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ - vmx/nested.o vmx/posted_intr.o + vmx/nested.o vmx/posted_intr.o vmx/main.o kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o kvm-intel-$(CONFIG_KVM_HYPERV) += vmx/hyperv.o vmx/hyperv_evmcs.o -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \ - svm/sev.o -kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o + +kvm-amd-$(CONFIG_KVM_AMD_SEV) += svm/sev.o +kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o ifdef CONFIG_HYPERV kvm-y += kvm_onhyperv.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 77352a4abd87..f2f2be5d1141 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -772,7 +772,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); kvm_cpu_cap_mask(CPUID_8000_001F_EAX, - 0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) | + 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ | F(SME_COHERENT)); kvm_cpu_cap_mask(CPUID_8000_0021_EAX, @@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = 0; break; case 0x80000008: { - unsigned g_phys_as = (entry->eax >> 16) & 0xff; - unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); - unsigned phys_as = entry->eax & 0xff; + /* + * GuestPhysAddrSize (EAX[23:16]) is intended for software + * use. + * + * KVM's ABI is to report the effective MAXPHYADDR for the + * guest in PhysAddrSize (phys_as), and the maximum + * *addressable* GPA in GuestPhysAddrSize (g_phys_as). + * + * GuestPhysAddrSize is valid if and only if TDP is enabled, + * in which case the max GPA that can be addressed by KVM may + * be less than the max GPA that can be legally generated by + * the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't + * support 5-level TDP. + */ + unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U); + unsigned int phys_as, g_phys_as; /* * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as @@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * reductions in MAXPHYADDR for memory encryption affect shadow * paging, too. * - * If TDP is enabled but an explicit guest MAXPHYADDR is not - * provided, use the raw bare metal MAXPHYADDR as reductions to - * the HPAs do not affect GPAs. + * If TDP is enabled, use the raw bare metal MAXPHYADDR as + * reductions to the HPAs do not affect GPAs. The max + * addressable GPA is the same as the max effective GPA, except + * that it's capped at 48 bits if 5-level TDP isn't supported + * (hardware processes bits 51:48 only when walking the fifth + * level page table). */ - if (!tdp_enabled) - g_phys_as = boot_cpu_data.x86_phys_bits; - else if (!g_phys_as) + if (!tdp_enabled) { + phys_as = boot_cpu_data.x86_phys_bits; + g_phys_as = 0; + } else { + phys_as = entry->eax & 0xff; g_phys_as = phys_as; + if (kvm_mmu_get_max_tdp_level() < 5) + g_phys_as = min(g_phys_as, 48); + } - entry->eax = g_phys_as | (virt_as << 8); + entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16); entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 5382646162a3..29ea4313e1bb 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -26,6 +26,7 @@ struct x86_exception { bool nested_page_fault; u64 address; /* cr2 or nested page fault gpa */ u8 async_page_fault; + unsigned long exit_qualification; }; /* diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 60f21bb4c27b..2e454316f2a2 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void) return boot_cpu_data.x86_phys_bits; } +u8 kvm_mmu_get_max_tdp_level(void); + void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); @@ -213,7 +215,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, */ u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; - int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; + int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1; u32 errcode = PFERR_PRESENT_MASK; bool fault; @@ -234,8 +236,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ - offset = (pfec & ~1) + - ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT)); + offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0); pkru_bits &= mmu->pkru_mask >> offset; errcode |= -pkru_bits & PFERR_PK_MASK; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index db007a4dffa2..662f62dfb2aa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) * The idea using the light way get the spte on x86_32 guest is from * gup_get_pte (mm/gup.c). * - * An spte tlb flush may be pending, because kvm_set_pte_rmap - * coalesces them and we are running out of the MMU lock. Therefore + * An spte tlb flush may be pending, because they are coalesced and + * we are running out of the MMU lock. Therefore * we need to protect against in-progress updates of the spte. * * Reading the spte while an update is in progress may get the old value @@ -567,9 +567,9 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) if (!is_shadow_present_pte(old_spte) || !spte_has_volatile_bits(old_spte)) - __update_clear_spte_fast(sptep, 0ull); + __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else - old_spte = __update_clear_spte_slow(sptep, 0ull); + old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); if (!is_shadow_present_pte(old_spte)) return old_spte; @@ -603,7 +603,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) */ static void mmu_spte_clear_no_track(u64 *sptep) { - __update_clear_spte_fast(sptep, 0ull); + __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); } static u64 mmu_spte_get_lockless(u64 *sptep) @@ -831,6 +831,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) gfn_t gfn; kvm->arch.indirect_shadow_pages++; + /* + * Ensure indirect_shadow_pages is elevated prior to re-reading guest + * child PTEs in FNAME(gpte_changed), i.e. guarantee either in-flight + * emulated writes are visible before re-reading guest PTEs, or that + * an emulated write will see the elevated count and acquire mmu_lock + * to update SPTEs. Pairs with the smp_mb() in kvm_mmu_track_write(). + */ + smp_mb(); + gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); @@ -1448,49 +1457,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, } static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { return __kvm_zap_rmap(kvm, rmap_head, slot); } -static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t pte) -{ - u64 *sptep; - struct rmap_iterator iter; - bool need_flush = false; - u64 new_spte; - kvm_pfn_t new_pfn; - - WARN_ON_ONCE(pte_huge(pte)); - new_pfn = pte_pfn(pte); - -restart: - for_each_rmap_spte(rmap_head, &iter, sptep) { - need_flush = true; - - if (pte_write(pte)) { - kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); - goto restart; - } else { - new_spte = kvm_mmu_changed_pte_notifier_make_spte( - *sptep, new_pfn); - - mmu_spte_clear_track_bits(kvm, sptep); - mmu_spte_set(sptep, new_spte); - } - } - - if (need_flush && kvm_available_flush_remote_tlbs_range()) { - kvm_flush_remote_tlbs_gfn(kvm, gfn, level); - return false; - } - - return need_flush; -} - struct slot_rmap_walk_iterator { /* input fields. */ const struct kvm_memory_slot *slot; @@ -1562,7 +1533,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, - int level, pte_t pte); + int level); static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, @@ -1574,7 +1545,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level, range->arg.pte); + iterator.level); return ret; } @@ -1596,22 +1567,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return flush; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - bool flush = false; - - if (kvm_memslots_have_rmaps(kvm)) - flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap); - - if (tdp_mmu_enabled) - flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range); - - return flush; -} - static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { u64 *sptep; struct rmap_iterator iter; @@ -1624,8 +1581,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, } static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, - int level, pte_t unused) + struct kvm_memory_slot *slot, gfn_t gfn, int level) { u64 *sptep; struct rmap_iterator iter; @@ -1950,7 +1906,8 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { - if (!sp->spt[i]) + /* sp->spt[i] has initial value of shadow page table allocation */ + if (sp->spt[i] == SHADOW_NONPRESENT_VALUE) return 0; return vcpu->arch.mmu->sync_spte(vcpu, sp, i); @@ -2514,7 +2471,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, return kvm_mmu_prepare_zap_page(kvm, child, invalid_list); } - } else if (is_mmio_spte(pte)) { + } else if (is_mmio_spte(kvm, pte)) { mmu_spte_clear_no_track(spte); } return 0; @@ -3314,9 +3271,19 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, { gva_t gva = fault->is_tdp ? 0 : fault->addr; + if (fault->is_private) { + kvm_mmu_prepare_memory_fault_exit(vcpu, fault); + return -EFAULT; + } + vcpu_cache_mmio_info(vcpu, gva, fault->gfn, access & shadow_mmio_access_mask); + fault->slot = NULL; + fault->pfn = KVM_PFN_NOSLOT; + fault->map_writable = false; + fault->hva = KVM_HVA_ERR_BAD; + /* * If MMIO caching is disabled, emulate immediately without * touching the shadow page tables as attempting to install an @@ -4196,7 +4163,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) if (WARN_ON_ONCE(reserved)) return -EINVAL; - if (is_mmio_spte(spte)) { + if (is_mmio_spte(vcpu->kvm, spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); unsigned int access = get_mmio_spte_access(spte); @@ -4259,24 +4226,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu) return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; } -static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - gfn_t gfn) +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) { struct kvm_arch_async_pf arch; arch.token = alloc_apf_token(vcpu); - arch.gfn = gfn; + arch.gfn = fault->gfn; + arch.error_code = fault->error_code; arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); - return kvm_setup_async_pf(vcpu, cr2_or_gpa, - kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, fault->addr, + kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch); } void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { int r; + if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS)) + return; + if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || work->wakeup_all) return; @@ -4289,7 +4260,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) return; - kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL); + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL); } static inline u8 kvm_max_level_for_order(int order) @@ -4309,14 +4280,6 @@ static inline u8 kvm_max_level_for_order(int order) return PG_LEVEL_4K; } -static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault) -{ - kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, - PAGE_SIZE, fault->write, fault->exec, - fault->is_private); -} - static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { @@ -4343,48 +4306,15 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - struct kvm_memory_slot *slot = fault->slot; bool async; - /* - * Retry the page fault if the gfn hit a memslot that is being deleted - * or moved. This ensures any existing SPTEs for the old memslot will - * be zapped before KVM inserts a new MMIO SPTE for the gfn. - */ - if (slot && (slot->flags & KVM_MEMSLOT_INVALID)) - return RET_PF_RETRY; - - if (!kvm_is_visible_memslot(slot)) { - /* Don't expose private memslots to L2. */ - if (is_guest_mode(vcpu)) { - fault->slot = NULL; - fault->pfn = KVM_PFN_NOSLOT; - fault->map_writable = false; - return RET_PF_CONTINUE; - } - /* - * If the APIC access page exists but is disabled, go directly - * to emulation without caching the MMIO access or creating a - * MMIO SPTE. That way the cache doesn't need to be purged - * when the AVIC is re-enabled. - */ - if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT && - !kvm_apicv_activated(vcpu->kvm)) - return RET_PF_EMULATE; - } - - if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { - kvm_mmu_prepare_memory_fault_exit(vcpu, fault); - return -EFAULT; - } - if (fault->is_private) return kvm_faultin_pfn_private(vcpu, fault); async = false; - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, - fault->write, &fault->map_writable, - &fault->hva); + fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, false, + &async, fault->write, + &fault->map_writable, &fault->hva); if (!async) return RET_PF_CONTINUE; /* *pfn has correct page already */ @@ -4394,7 +4324,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return RET_PF_RETRY; - } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { + } else if (kvm_arch_setup_async_pf(vcpu, fault)) { return RET_PF_RETRY; } } @@ -4404,17 +4334,72 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * to wait for IO. Note, gup always bails if it is unable to quickly * get a page and a fatal signal, i.e. SIGKILL, is pending. */ - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, - fault->write, &fault->map_writable, - &fault->hva); + fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, true, + NULL, fault->write, + &fault->map_writable, &fault->hva); return RET_PF_CONTINUE; } static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { + struct kvm_memory_slot *slot = fault->slot; int ret; + /* + * Note that the mmu_invalidate_seq also serves to detect a concurrent + * change in attributes. is_page_fault_stale() will detect an + * invalidation relate to fault->fn and resume the guest without + * installing a mapping in the page tables. + */ + fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; + smp_rmb(); + + /* + * Now that we have a snapshot of mmu_invalidate_seq we can check for a + * private vs. shared mismatch. + */ + if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) { + kvm_mmu_prepare_memory_fault_exit(vcpu, fault); + return -EFAULT; + } + + if (unlikely(!slot)) + return kvm_handle_noslot_fault(vcpu, fault, access); + + /* + * Retry the page fault if the gfn hit a memslot that is being deleted + * or moved. This ensures any existing SPTEs for the old memslot will + * be zapped before KVM inserts a new MMIO SPTE for the gfn. + */ + if (slot->flags & KVM_MEMSLOT_INVALID) + return RET_PF_RETRY; + + if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) { + /* + * Don't map L1's APIC access page into L2, KVM doesn't support + * using APICv/AVIC to accelerate L2 accesses to L1's APIC, + * i.e. the access needs to be emulated. Emulating access to + * L1's APIC is also correct if L1 is accelerating L2's own + * virtual APIC, but for some reason L1 also maps _L1's_ APIC + * into L2. Note, vcpu_is_mmio_gpa() always treats access to + * the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM + * uses different roots for L1 vs. L2, i.e. there is no danger + * of breaking APICv/AVIC for L1. + */ + if (is_guest_mode(vcpu)) + return kvm_handle_noslot_fault(vcpu, fault, access); + + /* + * If the APIC access page exists but is disabled, go directly + * to emulation without caching the MMIO access or creating a + * MMIO SPTE. That way the cache doesn't need to be purged + * when the AVIC is re-enabled. + */ + if (!kvm_apicv_activated(vcpu->kvm)) + return RET_PF_EMULATE; + } + fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); @@ -4439,8 +4424,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held * to detect retry guarantees the worst case latency for the vCPU. */ - if (fault->slot && - mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) + if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) return RET_PF_RETRY; ret = __kvm_faultin_pfn(vcpu, fault); @@ -4450,7 +4434,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (unlikely(is_error_pfn(fault->pfn))) return kvm_handle_error_pfn(vcpu, fault); - if (unlikely(!fault->slot)) + if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn))) return kvm_handle_noslot_fault(vcpu, fault, access); /* @@ -4561,6 +4545,16 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, if (WARN_ON_ONCE(fault_address >> 32)) return -EFAULT; #endif + /* + * Legacy #PF exception only have a 32-bit error code. Simply drop the + * upper bits as KVM doesn't use them for #PF (because they are never + * set), and to ensure there are no collisions with KVM-defined bits. + */ + if (WARN_ON_ONCE(error_code >> 32)) + error_code = lower_32_bits(error_code); + + /* Ensure the above sanity check also covers KVM-defined flags. */ + BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK)); vcpu->arch.l1tf_flush_l1d = true; if (!flags) { @@ -4812,7 +4806,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { - if (unlikely(is_mmio_spte(*sptep))) { + if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { mmu_spte_clear_no_track(sptep); return true; @@ -5322,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) return max_tdp_level; } +u8 kvm_mmu_get_max_tdp_level(void) +{ + return tdp_root_level ? tdp_root_level : max_tdp_level; +} + static union kvm_mmu_page_role kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, union kvm_cpu_role cpu_role) @@ -5802,10 +5801,15 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, bool flush = false; /* - * If we don't have indirect shadow pages, it means no page is - * write-protected, so we can exit simply. + * When emulating guest writes, ensure the written value is visible to + * any task that is handling page faults before checking whether or not + * KVM is shadowing a guest PTE. This ensures either KVM will create + * the correct SPTE in the page fault handler, or this task will see + * a non-zero indirect_shadow_pages. Pairs with the smp_mb() in + * account_shadowed(). */ - if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + smp_mb(); + if (!vcpu->kvm->arch.indirect_shadow_pages) return; write_lock(&vcpu->kvm->mmu_lock); @@ -5846,30 +5850,35 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err int r, emulation_type = EMULTYPE_PF; bool direct = vcpu->arch.mmu->root_role.direct; - /* - * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP - * checks when emulating instructions that triggers implicit access. - * WARN if hardware generates a fault with an error code that collides - * with the KVM-defined value. Clear the flag and continue on, i.e. - * don't terminate the VM, as KVM can't possibly be relying on a flag - * that KVM doesn't know about. - */ - if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS)) - error_code &= ~PFERR_IMPLICIT_ACCESS; - if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) return RET_PF_RETRY; + /* + * Except for reserved faults (emulated MMIO is shared-only), set the + * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's + * current attributes, which are the source of truth for such VMs. Note, + * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't + * currently supported nested virtualization (among many other things) + * for software-protected VMs. + */ + if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && + !(error_code & PFERR_RSVD_MASK) && + vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM && + kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa))) + error_code |= PFERR_PRIVATE_ACCESS; + r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { + if (WARN_ON_ONCE(error_code & PFERR_PRIVATE_ACCESS)) + return -EFAULT; + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, - lower_32_bits(error_code), false, + r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false, &emulation_type); if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) return -EIO; @@ -6173,7 +6182,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO; - vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; + vcpu->arch.mmu_shadow_page_cache.init_value = + SHADOW_NONPRESENT_VALUE; + if (!vcpu->arch.mmu_shadow_page_cache.init_value) + vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu = &vcpu->arch.root_mmu; vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; @@ -6316,6 +6328,7 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) void kvm_mmu_init_vm(struct kvm *kvm) { + kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 5390a591a571..ce2fcd19ba6b 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm) struct kvm_page_fault { /* arguments to kvm_mmu_do_page_fault. */ const gpa_t addr; - const u32 error_code; + const u64 error_code; const bool prefetch; /* Derived from error_code. */ @@ -279,8 +279,16 @@ enum { RET_PF_SPURIOUS, }; +static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT, + PAGE_SIZE, fault->write, fault->exec, + fault->is_private); +} + static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - u32 err, bool prefetch, int *emulation_type) + u64 err, bool prefetch, int *emulation_type) { struct kvm_page_fault fault = { .addr = cr2_or_gpa, @@ -298,7 +306,10 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, .max_level = KVM_MAX_HUGEPAGE_LEVEL, .req_level = PG_LEVEL_4K, .goal_level = PG_LEVEL_4K, - .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT), + .is_private = err & PFERR_PRIVATE_ACCESS, + + .pfn = KVM_PFN_ERR_FAULT, + .hva = KVM_HVA_ERR_BAD, }; int r; @@ -320,6 +331,17 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, else r = vcpu->arch.mmu->page_fault(vcpu, &fault); + /* + * Not sure what's happening, but punt to userspace and hope that + * they can fix it by changing memory to shared, or they can + * provide a better error. + */ + if (r == RET_PF_EMULATE && fault.is_private) { + pr_warn_ratelimited("kvm: unexpected emulation request on private memory\n"); + kvm_mmu_prepare_memory_fault_exit(vcpu, &fault); + return -EFAULT; + } + if (fault.write_fault_to_shadow_pgtable && emulation_type) *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index ae86820cef69..195d98bc8de8 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -260,7 +260,7 @@ TRACE_EVENT( TP_STRUCT__entry( __field(int, vcpu_id) __field(gpa_t, cr2_or_gpa) - __field(u32, error_code) + __field(u64, error_code) __field(u64 *, sptep) __field(u64, old_spte) __field(u64, new_spte) diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index f6448284c18e..561c331fd6ec 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) { - kvfree(slot->arch.gfn_write_track); + vfree(slot->arch.gfn_write_track); slot->arch.gfn_write_track = NULL; } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 4d4e98fe4f35..d3dbcf382ed2 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -497,21 +497,21 @@ error: * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { - vcpu->arch.exit_qualification &= (EPT_VIOLATION_GVA_IS_VALID | - EPT_VIOLATION_GVA_TRANSLATED); + walker->fault.exit_qualification = 0; + if (write_fault) - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_WRITE; if (user_fault) - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ; if (fetch_fault) - vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; + walker->fault.exit_qualification |= EPT_VIOLATION_ACC_INSTR; /* * Note, pte_access holds the raw RWX bits from the EPTE, not * ACC_*_MASK flags! */ - vcpu->arch.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << - EPT_VIOLATION_RWX_SHIFT; + walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) << + EPT_VIOLATION_RWX_SHIFT; } #endif walker->fault.address = addr; @@ -911,7 +911,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int gpa_t pte_gpa; gfn_t gfn; - if (WARN_ON_ONCE(!sp->spt[i])) + if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE)) return 0; first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); @@ -933,13 +933,13 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int return 0; /* - * Drop the SPTE if the new protections would result in a RWX=0 - * SPTE or if the gfn is changing. The RWX=0 case only affects - * EPT with execute-only support, i.e. EPT without an effective - * "present" bit, as all other paging modes will create a - * read-only SPTE if pte_access is zero. + * Drop the SPTE if the new protections result in no effective + * "present" bit or if the gfn is changing. The former case + * only affects EPT with execute-only support with pte_access==0; + * all other paging modes will create a read-only SPTE if + * pte_access is zero. */ - if ((!pte_access && !shadow_present_mask) || + if ((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE || gfn != kvm_mmu_page_get_gfn(sp, i)) { drop_spte(vcpu->kvm, &sp->spt[i]); return 1; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 4a599130e9c9..a5e014d7bc62 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) u64 spte = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; - WARN_ON_ONCE(!shadow_mmio_value); + WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value); access &= shadow_mmio_access_mask; - spte |= shadow_mmio_value | access; + spte |= vcpu->kvm->arch.shadow_mmio_value | access; spte |= gpa | shadow_nonpresent_or_rsvd_mask; spte |= (gpa & shadow_nonpresent_or_rsvd_mask) << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; @@ -144,19 +144,19 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 spte = SPTE_MMU_PRESENT_MASK; bool wrprot = false; - WARN_ON_ONCE(!pte_access && !shadow_present_mask); + /* + * For the EPT case, shadow_present_mask has no RWX bits set if + * exec-only page table entries are supported. In that case, + * ACC_USER_MASK and shadow_user_mask are used to represent + * read access. See FNAME(gpte_access) in paging_tmpl.h. + */ + WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE); if (sp->role.ad_disabled) spte |= SPTE_TDP_AD_DISABLED; else if (kvm_mmu_page_ad_need_write_protect(sp)) spte |= SPTE_TDP_AD_WRPROT_ONLY; - /* - * For the EPT case, shadow_present_mask is 0 if hardware - * supports exec-only page table entries. In that case, - * ACC_USER_MASK and shadow_user_mask are used to represent - * read access. See FNAME(gpte_access) in paging_tmpl.h. - */ spte |= shadow_present_mask; if (!prefetch) spte |= spte_shadow_accessed_mask(spte); @@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) return spte; } -u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn) -{ - u64 new_spte; - - new_spte = old_spte & ~SPTE_BASE_ADDR_MASK; - new_spte |= (u64)new_pfn << PAGE_SHIFT; - - new_spte &= ~PT_WRITABLE_MASK; - new_spte &= ~shadow_host_writable_mask; - new_spte &= ~shadow_mmu_writable_mask; - - new_spte = mark_spte_for_access_track(new_spte); - - return new_spte; -} - u64 mark_spte_for_access_track(u64 spte) { if (spte_ad_enabled(spte)) @@ -429,7 +413,9 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull; shadow_nx_mask = 0ull; shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; - shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; + /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */ + shadow_present_mask = + (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT; /* * EPT overrides the host MTRRs, and so KVM must program the desired * memtype directly into the SPTEs. Note, this mask is just the mask @@ -446,7 +432,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, - VMX_EPT_RWX_MASK, 0); + VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks); diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index a129951c9a88..5dd5405fa07a 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -149,6 +149,22 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) +/* + * Non-present SPTE value needs to set bit 63 for TDX, in order to suppress + * #VE and get EPT violations on non-present PTEs. We can use the + * same value also without TDX for both VMX and SVM: + * + * For SVM NPT, for non-present spte (bit 0 = 0), other bits are ignored. + * For VMX EPT, bit 63 is ignored if #VE is disabled. (EPT_VIOLATION_VE=0) + * bit 63 is #VE suppress if #VE is enabled. (EPT_VIOLATION_VE=1) + */ +#ifdef CONFIG_X86_64 +#define SHADOW_NONPRESENT_VALUE BIT_ULL(63) +static_assert(!(SHADOW_NONPRESENT_VALUE & SPTE_MMU_PRESENT_MASK)); +#else +#define SHADOW_NONPRESENT_VALUE 0ULL +#endif + extern u64 __read_mostly shadow_host_writable_mask; extern u64 __read_mostly shadow_mmu_writable_mask; extern u64 __read_mostly shadow_nx_mask; @@ -190,11 +206,11 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF - * vulnerability. Use only low bits to avoid 64-bit immediates. + * vulnerability. * * Only used by the TDP MMU. */ -#define REMOVED_SPTE 0x5a0ULL +#define REMOVED_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL) /* Removed SPTEs must not be misconstrued as shadow present PTEs. */ static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); @@ -249,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root) return spte_to_child_sp(root); } -static inline bool is_mmio_spte(u64 spte) +static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value && + return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && likely(enable_mmio_caching); } @@ -496,8 +512,6 @@ static inline u64 restore_acc_track_spte(u64 spte) return spte; } -u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn); - void __init kvm_mmu_spte_module_init(void); void kvm_mmu_reset_all_pte_masks(void); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 04c1f0957fea..1259dd63defc 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, * impact the guest since both the former and current SPTEs * are nonpresent. */ - if (WARN_ON_ONCE(!is_mmio_spte(old_spte) && - !is_mmio_spte(new_spte) && + if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) && + !is_mmio_spte(kvm, new_spte) && !is_removed_spte(new_spte))) pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" "should not be replaced with another,\n" @@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, kvm_set_pfn_accessed(spte_to_pfn(old_spte)); } +static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte) +{ + u64 *sptep = rcu_dereference(iter->sptep); + + /* + * The caller is responsible for ensuring the old SPTE is not a REMOVED + * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE, + * and pre-checking before inserting a new SPTE is advantageous as it + * avoids unnecessary work. + */ + WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte)); + + /* + * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and + * does not hold the mmu_lock. On failure, i.e. if a different logical + * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with + * the current value, so the caller operates on fresh data, e.g. if it + * retries tdp_mmu_set_spte_atomic() + */ + if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) + return -EBUSY; + + return 0; +} + /* * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically * and handle the associated bookkeeping. Do not mark the page dirty @@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { - u64 *sptep = rcu_dereference(iter->sptep); - - /* - * The caller is responsible for ensuring the old SPTE is not a REMOVED - * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE, - * and pre-checking before inserting a new SPTE is advantageous as it - * avoids unnecessary work. - */ - WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte)); + int ret; lockdep_assert_held_read(&kvm->mmu_lock); - /* - * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and - * does not hold the mmu_lock. On failure, i.e. if a different logical - * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with - * the current value, so the caller operates on fresh data, e.g. if it - * retries tdp_mmu_set_spte_atomic() - */ - if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) - return -EBUSY; + ret = __tdp_mmu_set_spte_atomic(iter, new_spte); + if (ret) + return ret; handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, new_spte, iter->level, true); @@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, { int ret; + lockdep_assert_held_read(&kvm->mmu_lock); + /* - * Freeze the SPTE by setting it to a special, - * non-present value. This will stop other threads from - * immediately installing a present entry in its place - * before the TLBs are flushed. + * Freeze the SPTE by setting it to a special, non-present value. This + * will stop other threads from immediately installing a present entry + * in its place before the TLBs are flushed. + * + * Delay processing of the zapped SPTE until after TLBs are flushed and + * the REMOVED_SPTE is replaced (see below). */ - ret = tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE); + ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE); if (ret) return ret; @@ -599,11 +614,19 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, /* * No other thread can overwrite the removed SPTE as they must either * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not - * overwrite the special removed SPTE value. No bookkeeping is needed - * here since the SPTE is going from non-present to non-present. Use - * the raw write helper to avoid an unnecessary check on volatile bits. + * overwrite the special removed SPTE value. Use the raw write helper to + * avoid an unnecessary check on volatile bits. */ - __kvm_tdp_mmu_write_spte(iter->sptep, 0); + __kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE); + + /* + * Process the zapped SPTE after flushing TLBs, and after replacing + * REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are + * blocked by the REMOVED_SPTE and reduces contention on the child + * SPTEs. + */ + handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + 0, iter->level, true); return 0; } @@ -740,8 +763,8 @@ retry: continue; if (!shared) - tdp_mmu_iter_set_spte(kvm, &iter, 0); - else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0)) + tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE); + else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) goto retry; } } @@ -808,8 +831,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte))) return false; - tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0, - sp->gfn, sp->role.level + 1); + tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, + SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1); return true; } @@ -843,7 +866,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, !is_last_spte(iter.old_spte, iter.level)) continue; - tdp_mmu_iter_set_spte(kvm, &iter, 0); + tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE); /* * Zappings SPTEs in invalid roots doesn't require a TLB flush, @@ -1028,7 +1051,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, } /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ - if (unlikely(is_mmio_spte(new_spte))) { + if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { vcpu->stat.pf_mmio_spte_created++; trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, new_spte); @@ -1258,52 +1281,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn); } -static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_gfn_range *range) -{ - u64 new_spte; - - /* Huge pages aren't expected to be modified without first being zapped. */ - WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end); - - if (iter->level != PG_LEVEL_4K || - !is_shadow_present_pte(iter->old_spte)) - return false; - - /* - * Note, when changing a read-only SPTE, it's not strictly necessary to - * zero the SPTE before setting the new PFN, but doing so preserves the - * invariant that the PFN of a present * leaf SPTE can never change. - * See handle_changed_spte(). - */ - tdp_mmu_iter_set_spte(kvm, iter, 0); - - if (!pte_write(range->arg.pte)) { - new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte, - pte_pfn(range->arg.pte)); - - tdp_mmu_iter_set_spte(kvm, iter, new_spte); - } - - return true; -} - -/* - * Handle the changed_pte MMU notifier for the TDP MMU. - * data is a pointer to the new pte_t mapping the HVA specified by the MMU - * notifier. - * Returns non-zero if a flush is needed before releasing the MMU lock. - */ -bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* - * No need to handle the remote TLB flush under RCU protection, the - * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a - * shadow page. See the WARN on pfn_changed in handle_changed_spte(). - */ - return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn); -} - /* * Remove write access from all SPTEs at or above min_level that map GFNs * [start, end). Returns true if an SPTE has been changed and the TLBs need to diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 6e1ea04ca885..58b55e61bd33 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush); bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, const struct kvm_memory_slot *slot, int min_level); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 759581bb2128..0623cfaa7bb0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -23,6 +23,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/fpu/xstate.h> #include <asm/debugreg.h> #include "mmu.h" @@ -32,22 +33,12 @@ #include "cpuid.h" #include "trace.h" -#ifndef CONFIG_KVM_AMD_SEV -/* - * When this config is not defined, SEV feature is not supported and APIs in - * this file are not used but this file still gets compiled into the KVM AMD - * module. - * - * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum - * misc_res_type {} defined in linux/misc_cgroup.h. - * - * Below macros allow compilation to succeed. - */ -#define MISC_CG_RES_SEV MISC_CG_RES_TYPES -#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES -#endif +#define GHCB_VERSION_MAX 2ULL +#define GHCB_VERSION_DEFAULT 2ULL +#define GHCB_VERSION_MIN 1ULL + +#define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP -#ifdef CONFIG_KVM_AMD_SEV /* enable/disable SEV support */ static bool sev_enabled = true; module_param_named(sev, sev_enabled, bool, 0444); @@ -57,13 +48,13 @@ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); /* enable/disable SEV-ES DebugSwap support */ -static bool sev_es_debug_swap_enabled = false; +static bool sev_es_debug_swap_enabled = true; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); -#else -#define sev_enabled false -#define sev_es_enabled false -#define sev_es_debug_swap_enabled false -#endif /* CONFIG_KVM_AMD_SEV */ +static u64 sev_supported_vmsa_features; + +#define AP_RESET_HOLD_NONE 0 +#define AP_RESET_HOLD_NAE_EVENT 1 +#define AP_RESET_HOLD_MSR_PROTO 2 static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); @@ -113,7 +104,15 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) static inline bool is_mirroring_enc_context(struct kvm *kvm) { - return !!to_kvm_svm(kvm)->sev_info.enc_context_owner; + return !!to_kvm_sev_info(kvm)->enc_context_owner; +} + +static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + + return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; } /* Must be called with the sev_bitmap_lock held */ @@ -251,20 +250,44 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_decommission(handle); } -static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, + struct kvm_sev_init *data, + unsigned long vm_type) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; + bool es_active = vm_type != KVM_X86_SEV_VM; + u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0; int ret; if (kvm->created_vcpus) return -EINVAL; + if (data->flags) + return -EINVAL; + + if (data->vmsa_features & ~valid_vmsa_features) + return -EINVAL; + + if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version)) + return -EINVAL; + if (unlikely(sev->active)) return -EINVAL; sev->active = true; - sev->es_active = argp->id == KVM_SEV_ES_INIT; + sev->es_active = es_active; + sev->vmsa_features = data->vmsa_features; + sev->ghcb_version = data->ghcb_version; + + /* + * Currently KVM supports the full range of mandatory features defined + * by version 2 of the GHCB protocol, so default to that for SEV-ES + * guests created via KVM_SEV_INIT2. + */ + if (sev->es_active && !sev->ghcb_version) + sev->ghcb_version = GHCB_VERSION_DEFAULT; + ret = sev_asid_new(sev); if (ret) goto e_no_asid; @@ -276,6 +299,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); + sev->need_init = false; kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); @@ -286,11 +310,53 @@ e_free: sev_asid_free(sev); sev->asid = 0; e_no_asid: + sev->vmsa_features = 0; sev->es_active = false; sev->active = false; return ret; } +static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_init data = { + .vmsa_features = 0, + .ghcb_version = 0, + }; + unsigned long vm_type; + + if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) + return -EINVAL; + + vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM); + + /* + * KVM_SEV_ES_INIT has been deprecated by KVM_SEV_INIT2, so it will + * continue to only ever support the minimal GHCB protocol version. + */ + if (vm_type == KVM_X86_SEV_ES_VM) + data.ghcb_version = GHCB_VERSION_MIN; + + return __sev_guest_init(kvm, argp, &data, vm_type); +} + +static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_init data; + + if (!sev->need_init) + return -EINVAL; + + if (kvm->arch.vm_type != KVM_X86_SEV_VM && + kvm->arch.vm_type != KVM_X86_SEV_ES_VM) + return -EINVAL; + + if (copy_from_user(&data, u64_to_user_ptr(argp->data), sizeof(data))) + return -EFAULT; + + return __sev_guest_init(kvm, argp, &data, kvm->arch.vm_type); +} + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { unsigned int asid = sev_get_asid(kvm); @@ -339,7 +405,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; memset(&start, 0, sizeof(start)); @@ -383,7 +449,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) /* return handle to userspace */ params.handle = start.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) { sev_unbind_asid(kvm, start.handle); ret = -EFAULT; goto e_free_session; @@ -522,7 +588,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; vaddr = params.uaddr; @@ -580,7 +646,13 @@ e_unpin: static int sev_es_sync_vmsa(struct vcpu_svm *svm) { + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; struct sev_es_save_area *save = svm->sev_es.vmsa; + struct xregs_state *xsave; + const u8 *s; + u8 *d; + int i; /* Check some debug related fields before encrypting the VMSA */ if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) @@ -621,10 +693,44 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - if (sev_es_debug_swap_enabled) { - save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; - pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " - "This will not work starting with Linux 6.10\n"); + save->sev_features = sev->vmsa_features; + + /* + * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid + * breaking older measurements. + */ + if (vcpu->kvm->arch.vm_type != KVM_X86_DEFAULT_VM) { + xsave = &vcpu->arch.guest_fpu.fpstate->regs.xsave; + save->x87_dp = xsave->i387.rdp; + save->mxcsr = xsave->i387.mxcsr; + save->x87_ftw = xsave->i387.twd; + save->x87_fsw = xsave->i387.swd; + save->x87_fcw = xsave->i387.cwd; + save->x87_fop = xsave->i387.fop; + save->x87_ds = 0; + save->x87_cs = 0; + save->x87_rip = xsave->i387.rip; + + for (i = 0; i < 8; i++) { + /* + * The format of the x87 save area is undocumented and + * definitely not what you would expect. It consists of + * an 8*8 bytes area with bytes 0-7, and an 8*2 bytes + * area with bytes 8-9 of each register. + */ + d = save->fpreg_x87 + i * 8; + s = ((u8 *)xsave->i387.st_space) + i * 16; + memcpy(d, s, 8); + save->fpreg_x87[64 + i * 2] = s[8]; + save->fpreg_x87[64 + i * 2 + 1] = s[9]; + } + memcpy(save->fpreg_xmm, xsave->i387.xmm_space, 256); + + s = get_xsave_addr(xsave, XFEATURE_YMM); + if (s) + memcpy(save->fpreg_ymm, s, 256); + else + memset(save->fpreg_ymm, 0, 256); } pr_debug("Virtual Machine Save Area (VMSA):\n"); @@ -658,13 +764,20 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); vmsa.reserved = 0; - vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; + vmsa.handle = to_kvm_sev_info(kvm)->handle; vmsa.address = __sme_pa(svm->sev_es.vmsa); vmsa.len = PAGE_SIZE; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); if (ret) return ret; + /* + * SEV-ES guests maintain an encrypted version of their FPU + * state which is restored and saved on VMRUN and VMEXIT. + * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't + * do xsave/xrstor on it. + */ + fpstate_set_confidential(&vcpu->arch.guest_fpu); vcpu->arch.guest_state_protected = true; return 0; } @@ -695,7 +808,7 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { - void __user *measure = (void __user *)(uintptr_t)argp->data; + void __user *measure = u64_to_user_ptr(argp->data); struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_measure data; struct kvm_sev_launch_measure params; @@ -715,7 +828,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - p = (void __user *)(uintptr_t)params.uaddr; + p = u64_to_user_ptr(params.uaddr); if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; @@ -788,7 +901,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) params.state = data.state; params.handle = data.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) ret = -EFAULT; return ret; @@ -953,7 +1066,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) + if (copy_from_user(&debug, u64_to_user_ptr(argp->data), sizeof(debug))) return -EFAULT; if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) @@ -1037,7 +1150,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); @@ -1101,7 +1214,7 @@ e_unpin_memory: static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) { - void __user *report = (void __user *)(uintptr_t)argp->data; + void __user *report = u64_to_user_ptr(argp->data); struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_attestation_report data; struct kvm_sev_attestation_report params; @@ -1112,7 +1225,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) return -EFAULT; memset(&data, 0, sizeof(data)); @@ -1121,7 +1234,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - p = (void __user *)(uintptr_t)params.uaddr; + p = u64_to_user_ptr(params.uaddr); if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; @@ -1174,7 +1287,7 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); params->session_len = data.session_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, params, + if (copy_to_user(u64_to_user_ptr(argp->data), params, sizeof(struct kvm_sev_send_start))) ret = -EFAULT; @@ -1193,7 +1306,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_send_start))) return -EFAULT; @@ -1248,7 +1361,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); - if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr, + if (!ret && copy_to_user(u64_to_user_ptr(params.session_uaddr), session_data, params.session_len)) { ret = -EFAULT; goto e_free_amd_cert; @@ -1256,7 +1369,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) params.policy = data.policy; params.session_len = data.session_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(struct kvm_sev_send_start))) ret = -EFAULT; @@ -1287,7 +1400,7 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, params->hdr_len = data.hdr_len; params->trans_len = data.trans_len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, params, + if (copy_to_user(u64_to_user_ptr(argp->data), params, sizeof(struct kvm_sev_send_update_data))) ret = -EFAULT; @@ -1307,7 +1420,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_send_update_data))) return -EFAULT; @@ -1358,14 +1471,14 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_trans_data; /* copy transport buffer to user space */ - if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr, + if (copy_to_user(u64_to_user_ptr(params.trans_uaddr), trans_data, params.trans_len)) { ret = -EFAULT; goto e_free_trans_data; } /* Copy packet header to userspace. */ - if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, + if (copy_to_user(u64_to_user_ptr(params.hdr_uaddr), hdr, params.hdr_len)) ret = -EFAULT; @@ -1417,7 +1530,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -ENOTTY; /* Get parameter from the userspace */ - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_receive_start))) return -EFAULT; @@ -1459,7 +1572,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) } params.handle = start.handle; - if (copy_to_user((void __user *)(uintptr_t)argp->data, + if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(struct kvm_sev_receive_start))) { ret = -EFAULT; sev_unbind_asid(kvm, start.handle); @@ -1490,7 +1603,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -EINVAL; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(struct kvm_sev_receive_update_data))) return -EFAULT; @@ -1705,6 +1818,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) dst->pages_locked = src->pages_locked; dst->enc_context_owner = src->enc_context_owner; dst->es_active = src->es_active; + dst->vmsa_features = src->vmsa_features; src->asid = 0; src->active = false; @@ -1812,7 +1926,8 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_fput; - if (sev_guest(kvm) || !sev_guest(source_kvm)) { + if (kvm->arch.vm_type != source_kvm->arch.vm_type || + sev_guest(kvm) || !sev_guest(source_kvm)) { ret = -EINVAL; goto out_unlock; } @@ -1861,6 +1976,21 @@ out_fput: return ret; } +int sev_dev_get_attr(u32 group, u64 attr, u64 *val) +{ + if (group != KVM_X86_GRP_SEV) + return -ENXIO; + + switch (attr) { + case KVM_X86_SEV_VMSA_FEATURES: + *val = sev_supported_vmsa_features; + return 0; + + default: + return -ENXIO; + } +} + int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1894,6 +2024,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; + case KVM_SEV_INIT2: + r = sev_guest_init2(kvm, &sev_cmd); + break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; @@ -2121,6 +2254,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) mirror_sev->asid = source_sev->asid; mirror_sev->fd = source_sev->fd; mirror_sev->es_active = source_sev->es_active; + mirror_sev->need_init = false; mirror_sev->handle = source_sev->handle; INIT_LIST_HEAD(&mirror_sev->regions_list); INIT_LIST_HEAD(&mirror_sev->mirror_vms); @@ -2186,15 +2320,18 @@ void sev_vm_destroy(struct kvm *kvm) void __init sev_set_cpu_caps(void) { - if (!sev_enabled) - kvm_cpu_cap_clear(X86_FEATURE_SEV); - if (!sev_es_enabled) - kvm_cpu_cap_clear(X86_FEATURE_SEV_ES); + if (sev_enabled) { + kvm_cpu_cap_set(X86_FEATURE_SEV); + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_VM); + } + if (sev_es_enabled) { + kvm_cpu_cap_set(X86_FEATURE_SEV_ES); + kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_ES_VM); + } } void __init sev_hardware_setup(void) { -#ifdef CONFIG_KVM_AMD_SEV unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; bool sev_es_supported = false; bool sev_supported = false; @@ -2294,7 +2431,10 @@ out: if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) sev_es_debug_swap_enabled = false; -#endif + + sev_supported_vmsa_features = 0; + if (sev_es_debug_swap_enabled) + sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; } void sev_hardware_unsetup(void) @@ -2585,6 +2725,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: + case SVM_VMGEXIT_HV_FEATURES: + case SVM_VMGEXIT_TERM_REQUEST: break; default: reason = GHCB_ERR_INVALID_EVENT; @@ -2615,6 +2757,9 @@ vmgexit_err: void sev_es_unmap_ghcb(struct vcpu_svm *svm) { + /* Clear any indication that the vCPU is in a type of AP Reset Hold */ + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE; + if (!svm->sev_es.ghcb) return; @@ -2774,6 +2919,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; u64 ghcb_info; int ret = 1; @@ -2784,7 +2930,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) switch (ghcb_info) { case GHCB_MSR_SEV_INFO_REQ: - set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN, sev_enc_bit)); break; @@ -2826,6 +2972,28 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) GHCB_MSR_INFO_POS); break; } + case GHCB_MSR_AP_RESET_HOLD_REQ: + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO; + ret = kvm_emulate_ap_reset_hold(&svm->vcpu); + + /* + * Preset the result to a non-SIPI return and then only set + * the result to non-zero when delivering a SIPI. + */ + set_ghcb_msr_bits(svm, 0, + GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, + GHCB_MSR_AP_RESET_HOLD_RESULT_POS); + + set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + case GHCB_MSR_HV_FT_REQ: + set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED, + GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS); + set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP, + GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); + break; case GHCB_MSR_TERM_REQ: { u64 reason_set, reason_code; @@ -2925,6 +3093,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; case SVM_VMGEXIT_AP_HLT_LOOP: + svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT; ret = kvm_emulate_ap_reset_hold(vcpu); break; case SVM_VMGEXIT_AP_JUMP_TABLE: { @@ -2949,6 +3118,19 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; } + case SVM_VMGEXIT_HV_FEATURES: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED); + + ret = 1; + break; + case SVM_VMGEXIT_TERM_REQUEST: + pr_info("SEV-ES guest requested termination: reason %#llx info %#llx\n", + control->exit_info_1, control->exit_info_2); + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; + vcpu->run->system_event.ndata = 1; + vcpu->run->system_event.data[0] = control->ghcb_gpa; + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", @@ -3063,7 +3245,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) svm_set_intercept(svm, TRAP_CR8_WRITE); vmcb->control.intercepts[INTERCEPT_DR] = 0; - if (!sev_es_debug_swap_enabled) { + if (!sev_vcpu_has_debug_swap(svm)) { vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); recalc_intercepts(svm); @@ -3109,16 +3291,19 @@ void sev_init_vmcb(struct vcpu_svm *svm) void sev_es_vcpu_reset(struct vcpu_svm *svm) { + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + /* * Set the GHCB MSR value as per the GHCB specification when emulating * vCPU RESET for an SEV-ES guest. */ - set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, + set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, GHCB_VERSION_MIN, sev_enc_bit)); } -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { /* * All host state for SEV-ES guests is categorized into three swap types @@ -3146,7 +3331,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both * saves and loads debug registers (Type-A). */ - if (sev_es_debug_swap_enabled) { + if (sev_vcpu_has_debug_swap(svm)) { hostsa->dr0 = native_get_debugreg(0); hostsa->dr1 = native_get_debugreg(1); hostsa->dr2 = native_get_debugreg(2); @@ -3168,15 +3353,31 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) return; } - /* - * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where - * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a - * non-zero value. - */ - if (!svm->sev_es.ghcb) - return; + /* Subsequent SIPI */ + switch (svm->sev_es.ap_reset_hold_type) { + case AP_RESET_HOLD_NAE_EVENT: + /* + * Return from an AP Reset Hold VMGEXIT, where the guest will + * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value. + */ + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); + break; + case AP_RESET_HOLD_MSR_PROTO: + /* + * Return from an AP Reset Hold VMGEXIT, where the guest will + * set the CS and RIP. Set GHCB data field to a non-zero value. + */ + set_ghcb_msr_bits(svm, 1, + GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, + GHCB_MSR_AP_RESET_HOLD_RESULT_POS); - ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); + set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, + GHCB_MSR_INFO_MASK, + GHCB_MSR_INFO_POS); + break; + default: + break; + } } struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9aaf83c8d57d..c8dc25886c16 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1433,14 +1433,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) vmsa_page = snp_safe_alloc_page(vcpu); if (!vmsa_page) goto error_free_vmcb_page; - - /* - * SEV-ES guests maintain an encrypted version of their FPU - * state which is restored and saved on VMRUN and VMEXIT. - * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't - * do xsave/xrstor on it. - */ - fpstate_set_confidential(&vcpu->arch.guest_fpu); } err = avic_init_vcpu(svm); @@ -1525,7 +1517,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm)) - sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd)); + sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd)); if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); @@ -2056,6 +2048,15 @@ static int npf_interception(struct kvm_vcpu *vcpu) u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; + /* + * WARN if hardware generates a fault with an error code that collides + * with KVM-defined sythentic flags. Clear the flags and continue on, + * i.e. don't terminate the VM, as KVM can't possibly be relying on a + * flag that KVM doesn't know about. + */ + if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK)) + error_code &= ~PFERR_SYNTHETIC_MASK; + trace_kvm_page_fault(vcpu, fault_address, error_code); return kvm_mmu_page_fault(vcpu, fault_address, error_code, static_cpu_has(X86_FEATURE_DECODEASSISTS) ? @@ -3304,7 +3305,9 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, +#ifdef CONFIG_KVM_AMD_SEV [SVM_EXIT_VMGEXIT] = sev_handle_vmgexit, +#endif }; static void dump_vmcb(struct kvm_vcpu *vcpu) @@ -4085,6 +4088,9 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) { + if (to_kvm_sev_info(vcpu->kvm)->need_init) + return -EINVAL; + return 1; } @@ -4892,6 +4898,14 @@ static void svm_vm_destroy(struct kvm *kvm) static int svm_vm_init(struct kvm *kvm) { + int type = kvm->arch.vm_type; + + if (type != KVM_X86_DEFAULT_VM && + type != KVM_X86_SW_PROTECTED_VM) { + kvm->arch.has_protected_state = (type == KVM_X86_SEV_ES_VM); + to_kvm_sev_info(kvm)->need_init = true; + } + if (!pause_filter_count || !pause_filter_thresh) kvm->arch.pause_in_guest = true; @@ -5026,6 +5040,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .enable_smi_window = svm_enable_smi_window, #endif +#ifdef CONFIG_KVM_AMD_SEV + .dev_get_attr = sev_dev_get_attr, .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, .mem_enc_unregister_region = sev_mem_enc_unregister_region, @@ -5033,7 +5049,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, - +#endif .check_emulate_instruction = svm_check_emulate_instruction, .apic_init_signal_blocked = svm_apic_init_signal_blocked, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 33878efdebc8..be57213cd295 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,12 +79,15 @@ enum { struct kvm_sev_info { bool active; /* SEV enabled guest */ bool es_active; /* SEV-ES enabled guest */ + bool need_init; /* waiting for SEV_INIT2 */ unsigned int asid; /* ASID used for this guest */ unsigned int handle; /* SEV firmware handle */ int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ + u64 vmsa_features; + u16 ghcb_version; /* Highest guest GHCB protocol version allowed */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ struct list_head mirror_vms; /* List of VMs mirroring */ struct list_head mirror_entry; /* Use as a list entry of mirrors */ @@ -197,6 +200,7 @@ struct vcpu_sev_es_state { u8 valid_bitmap[16]; struct kvm_host_map ghcb_map; bool received_first_sipi; + unsigned int ap_reset_hold_type; /* SEV-ES scratch area support */ u64 sw_scratch; @@ -318,6 +322,11 @@ static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) return container_of(kvm, struct kvm_svm, kvm); } +static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm) +{ + return &to_kvm_svm(kvm)->sev_info; +} + static __always_inline bool sev_guest(struct kvm *kvm) { #ifdef CONFIG_KVM_AMD_SEV @@ -664,13 +673,16 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - - -extern unsigned int max_sev_asid; +void pre_sev_run(struct vcpu_svm *svm, int cpu); +void sev_init_vmcb(struct vcpu_svm *svm); +void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); +int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); +void sev_es_vcpu_reset(struct vcpu_svm *svm); +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); +void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); -void sev_vm_destroy(struct kvm *kvm); +#ifdef CONFIG_KVM_AMD_SEV int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp); int sev_mem_enc_register_region(struct kvm *kvm, struct kvm_enc_region *range); @@ -679,22 +691,32 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); void sev_guest_memory_reclaimed(struct kvm *kvm); +int sev_handle_vmgexit(struct kvm_vcpu *vcpu); -void pre_sev_run(struct vcpu_svm *svm, int cpu); +/* These symbols are used in common code and are stubbed below. */ +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +void sev_free_vcpu(struct kvm_vcpu *vcpu); +void sev_vm_destroy(struct kvm *kvm); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); -void sev_init_vmcb(struct vcpu_svm *svm); -void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); -void sev_free_vcpu(struct kvm_vcpu *vcpu); -int sev_handle_vmgexit(struct kvm_vcpu *vcpu); -int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); -void sev_es_vcpu_reset(struct vcpu_svm *svm); -void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); -void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); -void sev_es_unmap_ghcb(struct vcpu_svm *svm); -struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); +int sev_dev_get_attr(u32 group, u64 attr, u64 *val); +extern unsigned int max_sev_asid; +#else +static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) { + return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); +} + +static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {} +static inline void sev_vm_destroy(struct kvm *kvm) {} +static inline void __init sev_set_cpu_caps(void) {} +static inline void __init sev_hardware_setup(void) {} +static inline void sev_hardware_unsetup(void) {} +static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } +static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } +#define max_sev_asid 0 +#endif /* vmenter.S */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index c6b4b1728006..9d0b02ef307e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition, ); /* - * Tracepoint for VT-d posted-interrupts. + * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC. */ TRACE_EVENT(kvm_pi_irte_update, TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, @@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update, __entry->set = set; ), - TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, " + TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, " "gvec: 0x%x, pi_desc_addr: 0x%llx", __entry->set ? "enabled and being updated" : "disabled", __entry->host_irq, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c new file mode 100644 index 000000000000..d4ed681785fd --- /dev/null +++ b/arch/x86/kvm/vmx/main.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/moduleparam.h> + +#include "x86_ops.h" +#include "vmx.h" +#include "nested.h" +#include "pmu.h" +#include "posted_intr.h" + +#define VMX_REQUIRED_APICV_INHIBITS \ + (BIT(APICV_INHIBIT_REASON_DISABLE)| \ + BIT(APICV_INHIBIT_REASON_ABSENT) | \ + BIT(APICV_INHIBIT_REASON_HYPERV) | \ + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ + BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED)) + +struct kvm_x86_ops vt_x86_ops __initdata = { + .name = KBUILD_MODNAME, + + .check_processor_compatibility = vmx_check_processor_compat, + + .hardware_unsetup = vmx_hardware_unsetup, + + .hardware_enable = vmx_hardware_enable, + .hardware_disable = vmx_hardware_disable, + .has_emulated_msr = vmx_has_emulated_msr, + + .vm_size = sizeof(struct kvm_vmx), + .vm_init = vmx_vm_init, + .vm_destroy = vmx_vm_destroy, + + .vcpu_precreate = vmx_vcpu_precreate, + .vcpu_create = vmx_vcpu_create, + .vcpu_free = vmx_vcpu_free, + .vcpu_reset = vmx_vcpu_reset, + + .prepare_switch_to_guest = vmx_prepare_switch_to_guest, + .vcpu_load = vmx_vcpu_load, + .vcpu_put = vmx_vcpu_put, + + .update_exception_bitmap = vmx_update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, + .get_msr = vmx_get_msr, + .set_msr = vmx_set_msr, + .get_segment_base = vmx_get_segment_base, + .get_segment = vmx_get_segment, + .set_segment = vmx_set_segment, + .get_cpl = vmx_get_cpl, + .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .is_valid_cr0 = vmx_is_valid_cr0, + .set_cr0 = vmx_set_cr0, + .is_valid_cr4 = vmx_is_valid_cr4, + .set_cr4 = vmx_set_cr4, + .set_efer = vmx_set_efer, + .get_idt = vmx_get_idt, + .set_idt = vmx_set_idt, + .get_gdt = vmx_get_gdt, + .set_gdt = vmx_set_gdt, + .set_dr7 = vmx_set_dr7, + .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, + .cache_reg = vmx_cache_reg, + .get_rflags = vmx_get_rflags, + .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, + + .flush_tlb_all = vmx_flush_tlb_all, + .flush_tlb_current = vmx_flush_tlb_current, + .flush_tlb_gva = vmx_flush_tlb_gva, + .flush_tlb_guest = vmx_flush_tlb_guest, + + .vcpu_pre_run = vmx_vcpu_pre_run, + .vcpu_run = vmx_vcpu_run, + .handle_exit = vmx_handle_exit, + .skip_emulated_instruction = vmx_skip_emulated_instruction, + .update_emulated_instruction = vmx_update_emulated_instruction, + .set_interrupt_shadow = vmx_set_interrupt_shadow, + .get_interrupt_shadow = vmx_get_interrupt_shadow, + .patch_hypercall = vmx_patch_hypercall, + .inject_irq = vmx_inject_irq, + .inject_nmi = vmx_inject_nmi, + .inject_exception = vmx_inject_exception, + .cancel_injection = vmx_cancel_injection, + .interrupt_allowed = vmx_interrupt_allowed, + .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, + .enable_nmi_window = vmx_enable_nmi_window, + .enable_irq_window = vmx_enable_irq_window, + .update_cr8_intercept = vmx_update_cr8_intercept, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, + .load_eoi_exitmap = vmx_load_eoi_exitmap, + .apicv_pre_state_restore = vmx_apicv_pre_state_restore, + .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, + .hwapic_irr_update = vmx_hwapic_irr_update, + .hwapic_isr_update = vmx_hwapic_isr_update, + .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_interrupt = vmx_deliver_interrupt, + .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, + + .set_tss_addr = vmx_set_tss_addr, + .set_identity_map_addr = vmx_set_identity_map_addr, + .get_mt_mask = vmx_get_mt_mask, + + .get_exit_info = vmx_get_exit_info, + + .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, + + .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + + .get_l2_tsc_offset = vmx_get_l2_tsc_offset, + .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier, + .write_tsc_offset = vmx_write_tsc_offset, + .write_tsc_multiplier = vmx_write_tsc_multiplier, + + .load_mmu_pgd = vmx_load_mmu_pgd, + + .check_intercept = vmx_check_intercept, + .handle_exit_irqoff = vmx_handle_exit_irqoff, + + .sched_in = vmx_sched_in, + + .cpu_dirty_log_size = PML_ENTITY_NUM, + .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, + + .nested_ops = &vmx_nested_ops, + + .pi_update_irte = vmx_pi_update_irte, + .pi_start_assignment = vmx_pi_start_assignment, + +#ifdef CONFIG_X86_64 + .set_hv_timer = vmx_set_hv_timer, + .cancel_hv_timer = vmx_cancel_hv_timer, +#endif + + .setup_mce = vmx_setup_mce, + +#ifdef CONFIG_KVM_SMM + .smi_allowed = vmx_smi_allowed, + .enter_smm = vmx_enter_smm, + .leave_smm = vmx_leave_smm, + .enable_smi_window = vmx_enable_smi_window, +#endif + + .check_emulate_instruction = vmx_check_emulate_instruction, + .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .migrate_timers = vmx_migrate_timers, + + .msr_filter_changed = vmx_msr_filter_changed, + .complete_emulated_msr = kvm_complete_insn_gp, + + .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, + + .get_untagged_addr = vmx_get_untagged_addr, +}; + +struct kvm_x86_init_ops vt_init_ops __initdata = { + .hardware_setup = vmx_hardware_setup, + .handle_intel_pt_intr = NULL, + + .runtime_ops = &vt_x86_ops, + .pmu_ops = &intel_pmu_ops, +}; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d05ddf751491..d5b832126e34 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -409,18 +409,40 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qualification; u32 vm_exit_reason; - unsigned long exit_qualification = vcpu->arch.exit_qualification; if (vmx->nested.pml_full) { vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; - exit_qualification &= INTR_INFO_UNBLOCK_NMI; + + /* + * It should be impossible to trigger a nested PML Full VM-Exit + * for anything other than an EPT Violation from L2. KVM *can* + * trigger nEPT page fault injection in response to an EPT + * Misconfig, e.g. if the MMIO SPTE was stale and L1's EPT + * tables also changed, but KVM should not treat EPT Misconfig + * VM-Exits as writes. + */ + WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION); + + /* + * PML Full and EPT Violation VM-Exits both use bit 12 to report + * "NMI unblocking due to IRET", i.e. the bit can be propagated + * as-is from the original EXIT_QUALIFICATION. + */ + exit_qualification = vmx_get_exit_qual(vcpu) & INTR_INFO_UNBLOCK_NMI; } else { - if (fault->error_code & PFERR_RSVD_MASK) + if (fault->error_code & PFERR_RSVD_MASK) { vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; - else + exit_qualification = 0; + } else { + exit_qualification = fault->exit_qualification; + exit_qualification |= vmx_get_exit_qual(vcpu) & + (EPT_VIOLATION_GVA_IS_VALID | + EPT_VIOLATION_GVA_TRANSLATED); vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + } /* * Although the caller (kvm_inject_emulated_page_fault) would diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 7c1996b433e2..b25625314658 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -140,6 +140,11 @@ static inline bool is_nm_fault(u32 intr_info) return is_exception_n(intr_info, NM_VECTOR); } +static inline bool is_ve_fault(u32 intr_info) +{ + return is_exception_n(intr_info, VE_VECTOR); +} + /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index becefaf95cab..6051fad5945f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -68,6 +68,7 @@ #include "vmcs12.h" #include "vmx.h" #include "x86.h" +#include "x86_ops.h" #include "smm.h" #include "vmx_onhyperv.h" #include "posted_intr.h" @@ -531,8 +532,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; #if IS_ENABLED(CONFIG_HYPERV) -static struct kvm_x86_ops vmx_x86_ops __initdata; - static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -582,9 +581,8 @@ static __init void hv_init_evmcs(void) } if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_l2_tlb_flush + vt_x86_ops.enable_l2_tlb_flush = hv_enable_l2_tlb_flush; - } else { enlightened_vmcs = false; } @@ -875,6 +873,12 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); /* + * #VE isn't used for VMX. To test against unexpected changes + * related to #VE for VMX, intercept unexpected #VE and warn on it. + */ + if (IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + eb |= 1u << VE_VECTOR; + /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway @@ -1478,7 +1482,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1489,7 +1493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx->host_debugctlmsr = get_debugctlmsr(); } -static void vmx_vcpu_put(struct kvm_vcpu *vcpu) +void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_vcpu_pi_put(vcpu); @@ -1548,7 +1552,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = vmx_emulation_required(vcpu); } -static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +bool vmx_get_if_flag(struct kvm_vcpu *vcpu) { return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; } @@ -1654,8 +1658,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, - void *insn, int insn_len) +int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does @@ -1739,7 +1743,7 @@ rip_updated: * Recognizes a pending MTF VM-exit and records the nested state for later * delivery. */ -static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) +void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1770,7 +1774,7 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) } } -static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) +int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) { vmx_update_emulated_instruction(vcpu); return skip_emulated_instruction(vcpu); @@ -1789,7 +1793,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); } -static void vmx_inject_exception(struct kvm_vcpu *vcpu) +void vmx_inject_exception(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; @@ -1910,12 +1914,12 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) return kvm_caps.default_tsc_scaling_ratio; } -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) +void vmx_write_tsc_offset(struct kvm_vcpu *vcpu) { vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); } -static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) +void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu) { vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); } @@ -1958,7 +1962,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: @@ -1975,7 +1979,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; @@ -2156,7 +2160,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_uret_msr *msr; @@ -2459,7 +2463,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return ret; } -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) +void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { unsigned long guest_owned_bits; @@ -2607,6 +2611,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, &_cpu_based_2nd_exec_control)) return -EIO; } + if (!IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; + #ifndef CONFIG_X86_64 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) @@ -2631,6 +2638,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmx_cap->ept = 0; + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; } if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && vmx_cap->vpid) { @@ -2760,7 +2768,7 @@ static bool kvm_is_vmx_supported(void) return supported; } -static int vmx_check_processor_compat(void) +int vmx_check_processor_compat(void) { int cpu = raw_smp_processor_id(); struct vmcs_config vmcs_conf; @@ -2802,7 +2810,7 @@ fault: return -EFAULT; } -static int vmx_hardware_enable(void) +int vmx_hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2842,7 +2850,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -static void vmx_hardware_disable(void) +void vmx_hardware_disable(void) { vmclear_local_loaded_vmcss(); @@ -3156,7 +3164,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3186,7 +3194,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->vpid; } -static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; u64 root_hpa = mmu->root.hpa; @@ -3202,7 +3210,7 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) vpid_sync_context(vmx_get_current_vpid(vcpu)); } -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { /* * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in @@ -3211,7 +3219,7 @@ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); } -static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) +void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) { /* * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a @@ -3256,7 +3264,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ CPU_BASED_CR3_STORE_EXITING) -static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (is_guest_mode(vcpu)) return nested_guest_cr0_valid(vcpu, cr0); @@ -3377,8 +3385,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) return eptp; } -static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, - int root_level) +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) { struct kvm *kvm = vcpu->kvm; bool update_guest_cr3 = true; @@ -3407,8 +3414,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, vmcs_writel(GUEST_CR3, guest_cr3); } - -static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { /* * We operate under the default treatment of SMM, so VMX cannot be @@ -3524,7 +3530,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) var->g = (ar >> 15) & 1; } -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) +u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_segment s; @@ -3601,14 +3607,14 @@ void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); } -static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { __vmx_set_segment(vcpu, var, seg); to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu); } -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); @@ -3616,25 +3622,25 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) *l = (ar >> 13) & 1; } -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { dt->size = vmcs_read32(GUEST_IDTR_LIMIT); dt->address = vmcs_readl(GUEST_IDTR_BASE); } -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { vmcs_write32(GUEST_IDTR_LIMIT, dt->size); vmcs_writel(GUEST_IDTR_BASE, dt->address); } -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { dt->size = vmcs_read32(GUEST_GDTR_LIMIT); dt->address = vmcs_readl(GUEST_GDTR_BASE); } -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { vmcs_write32(GUEST_GDTR_LIMIT, dt->size); vmcs_writel(GUEST_GDTR_BASE, dt->address); @@ -4102,7 +4108,7 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); void *vapic_page; @@ -4122,7 +4128,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) return ((rvi & 0xf0) > (vppr & 0xf0)); } -static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) +void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 i; @@ -4266,8 +4272,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) return 0; } -static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, - int trig_mode, int vector) +void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) { struct kvm_vcpu *vcpu = apic->vcpu; @@ -4429,7 +4435,7 @@ static u32 vmx_vmexit_ctrl(void) ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); } -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4595,6 +4601,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; if (!enable_ept) { exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; enable_unrestricted_guest = 0; } if (!enable_unrestricted_guest) @@ -4693,7 +4700,7 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm) return 0; } -static int vmx_vcpu_precreate(struct kvm *kvm) +int vmx_vcpu_precreate(struct kvm *kvm) { return vmx_alloc_ipiv_pid_table(kvm); } @@ -4718,8 +4725,12 @@ static void init_vmcs(struct vcpu_vmx *vmx) exec_controls_set(vmx, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) + if (cpu_has_secondary_exec_ctrls()) { secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); + if (vmx->ve_info) + vmcs_write64(VE_INFORMATION_ADDRESS, + __pa(vmx->ve_info)); + } if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -4848,7 +4859,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) __pi_set_sn(&vmx->pi_desc); } -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4907,12 +4918,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_update_fb_clear_dis(vcpu, vmx); } -static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) +void vmx_enable_irq_window(struct kvm_vcpu *vcpu) { exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); } -static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) +void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) { if (!enable_vnmi || vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { @@ -4923,7 +4934,7 @@ static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu) exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); } -static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) +void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) { struct vcpu_vmx *vmx = to_vmx(vcpu); uint32_t intr; @@ -4951,7 +4962,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) vmx_clear_hlt(vcpu); } -static void vmx_inject_nmi(struct kvm_vcpu *vcpu) +void vmx_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5029,7 +5040,7 @@ bool vmx_nmi_blocked(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) return -EBUSY; @@ -5051,7 +5062,7 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) return -EBUSY; @@ -5066,7 +5077,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !vmx_interrupt_blocked(vcpu); } -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) +int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { void __user *ret; @@ -5086,7 +5097,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return init_rmode_tss(kvm, ret); } -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) { to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; return 0; @@ -5207,6 +5218,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); + if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm)) + return -EIO; + error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); @@ -5372,8 +5386,7 @@ static int handle_io(struct kvm_vcpu *vcpu) return kvm_fast_pio(vcpu, size, port, in); } -static void -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) +void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) { /* * Patch in the VMCALL instruction: @@ -5579,7 +5592,7 @@ out: return kvm_complete_insn_gp(vcpu, err); } -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) +void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { get_debugreg(vcpu->arch.db[0], 0); get_debugreg(vcpu->arch.db[1], 1); @@ -5598,7 +5611,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) +void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); } @@ -5771,8 +5784,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; - vcpu->arch.exit_qualification = exit_qualification; - /* * Check that the GPA doesn't exceed physical memory limits, as that is * a guest page fault. We have to emulate the instruction here, because @@ -5869,7 +5880,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) return 1; } -static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) +int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) { if (vmx_emulation_required_with_pending_exception(vcpu)) { kvm_prepare_emulation_failure_exit(vcpu); @@ -6157,9 +6168,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, - u64 *info1, u64 *info2, - u32 *intr_info, u32 *error_code) +void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6417,6 +6427,24 @@ void dump_vmcs(struct kvm_vcpu *vcpu) if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) pr_err("Virtual processor ID = 0x%04x\n", vmcs_read16(VIRTUAL_PROCESSOR_ID)); + if (secondary_exec_control & SECONDARY_EXEC_EPT_VIOLATION_VE) { + struct vmx_ve_information *ve_info = vmx->ve_info; + u64 ve_info_pa = vmcs_read64(VE_INFORMATION_ADDRESS); + + /* + * If KVM is dumping the VMCS, then something has gone wrong + * already. Derefencing an address from the VMCS, which could + * very well be corrupted, is a terrible idea. The virtual + * address is known so use it. + */ + pr_err("VE info address = 0x%016llx%s\n", ve_info_pa, + ve_info_pa == __pa(ve_info) ? "" : "(corrupted!)"); + pr_err("ve_info: 0x%08x 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%04x\n", + ve_info->exit_reason, ve_info->delivery, + ve_info->exit_qualification, + ve_info->guest_linear_address, + ve_info->guest_physical_address, ve_info->eptp_index); + } } /* @@ -6602,7 +6630,7 @@ unexpected_vmexit: return 0; } -static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { int ret = __vmx_handle_exit(vcpu, exit_fastpath); @@ -6690,7 +6718,7 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu) : "eax", "ebx", "ecx", "edx"); } -static void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) +void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); int tpr_threshold; @@ -6760,7 +6788,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) vmx_update_msr_bitmap_x2apic(vcpu); } -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) +void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) { const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; @@ -6829,7 +6857,7 @@ out: kvm_release_pfn_clean(pfn); } -static void vmx_hwapic_isr_update(int max_isr) +void vmx_hwapic_isr_update(int max_isr) { u16 status; u8 old; @@ -6863,7 +6891,7 @@ static void vmx_set_rvi(int vector) } } -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) +void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { /* * When running L2, updating RVI is only relevant when @@ -6877,7 +6905,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) vmx_set_rvi(max_irr); } -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -6923,7 +6951,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) return; @@ -6934,7 +6962,7 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); } -static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) +void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6965,24 +6993,22 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); } -static void handle_exception_irqoff(struct vcpu_vmx *vmx) +static void handle_exception_irqoff(struct kvm_vcpu *vcpu, u32 intr_info) { - u32 intr_info = vmx_get_intr_info(&vmx->vcpu); - /* if exit due to PF check for async PF */ if (is_page_fault(intr_info)) - vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); + vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); /* if exit due to NM, handle before interrupts are enabled */ else if (is_nm_fault(intr_info)) - handle_nm_fault_irqoff(&vmx->vcpu); + handle_nm_fault_irqoff(vcpu); /* Handle machine checks before interrupts are enabled */ else if (is_machine_check(intr_info)) kvm_machine_check(); } -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) +static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu, + u32 intr_info) { - u32 intr_info = vmx_get_intr_info(vcpu); unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, @@ -6999,7 +7025,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) vcpu->arch.at_instruction_boundary = true; } -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7007,16 +7033,16 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) return; if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) - handle_external_interrupt_irqoff(vcpu); + handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu)); else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) - handle_exception_irqoff(vmx); + handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu)); } /* * The kvm parameter can be NULL (module initialization, or invocation before * VM creation). Be sure to check the kvm parameter before using it. */ -static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) +bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: @@ -7139,7 +7165,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) IDT_VECTORING_ERROR_CODE); } -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) +void vmx_cancel_injection(struct kvm_vcpu *vcpu) { __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), @@ -7309,7 +7335,7 @@ out: guest_state_exit_irqoff(); } -static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; @@ -7464,7 +7490,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit); } -static void vmx_vcpu_free(struct kvm_vcpu *vcpu) +void vmx_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7473,9 +7499,10 @@ static void vmx_vcpu_free(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); + free_page((unsigned long)vmx->ve_info); } -static int vmx_vcpu_create(struct kvm_vcpu *vcpu) +int vmx_vcpu_create(struct kvm_vcpu *vcpu) { struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; @@ -7566,6 +7593,20 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu) goto free_vmcs; } + err = -ENOMEM; + if (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_EPT_VIOLATION_VE) { + struct page *page; + + BUILD_BUG_ON(sizeof(*vmx->ve_info) > PAGE_SIZE); + + /* ve_info must be page aligned. */ + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!page) + goto free_vmcs; + + vmx->ve_info = page_to_virt(page); + } + if (vmx_can_use_ipiv(vcpu)) WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id], __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID); @@ -7584,7 +7625,7 @@ free_vpid: #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" -static int vmx_vm_init(struct kvm *kvm) +int vmx_vm_init(struct kvm *kvm) { if (!ple_gap) kvm->arch.pause_in_guest = true; @@ -7615,7 +7656,7 @@ static int vmx_vm_init(struct kvm *kvm) return 0; } -static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in * memory aliases with conflicting memory types and sometimes MCEs. @@ -7787,7 +7828,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); } -static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) +void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8002,10 +8043,10 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } -static int vmx_check_intercept(struct kvm_vcpu *vcpu, - struct x86_instruction_info *info, - enum x86_intercept_stage stage, - struct x86_exception *exception) +int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage, + struct x86_exception *exception) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -8085,8 +8126,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, return 0; } -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, - bool *expired) +int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired) { struct vcpu_vmx *vmx; u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; @@ -8125,13 +8166,13 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, return 0; } -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) +void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) { to_vmx(vcpu)->hv_deadline_tsc = -1; } #endif -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) +void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { if (!kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); @@ -8160,7 +8201,7 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML); } -static void vmx_setup_mce(struct kvm_vcpu *vcpu) +void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= @@ -8171,7 +8212,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_SMM -static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ if (to_vmx(vcpu)->nested.nested_run_pending) @@ -8179,7 +8220,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !is_smm(vcpu); } -static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) +int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8200,7 +8241,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) return 0; } -static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) +int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; @@ -8221,18 +8262,18 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) return 0; } -static void vmx_enable_smi_window(struct kvm_vcpu *vcpu) +void vmx_enable_smi_window(struct kvm_vcpu *vcpu) { /* RSM will cause a vmexit anyway. */ } #endif -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) +bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); } -static void vmx_migrate_timers(struct kvm_vcpu *vcpu) +void vmx_migrate_timers(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; @@ -8242,7 +8283,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) } } -static void vmx_hardware_unsetup(void) +void vmx_hardware_unsetup(void) { kvm_set_posted_intr_wakeup_handler(NULL); @@ -8252,18 +8293,7 @@ static void vmx_hardware_unsetup(void) free_kvm_area(); } -#define VMX_REQUIRED_APICV_INHIBITS \ -( \ - BIT(APICV_INHIBIT_REASON_DISABLE)| \ - BIT(APICV_INHIBIT_REASON_ABSENT) | \ - BIT(APICV_INHIBIT_REASON_HYPERV) | \ - BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ - BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ - BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ - BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \ -) - -static void vmx_vm_destroy(struct kvm *kvm) +void vmx_vm_destroy(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); @@ -8314,148 +8344,6 @@ gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63)); } -static struct kvm_x86_ops vmx_x86_ops __initdata = { - .name = KBUILD_MODNAME, - - .check_processor_compatibility = vmx_check_processor_compat, - - .hardware_unsetup = vmx_hardware_unsetup, - - .hardware_enable = vmx_hardware_enable, - .hardware_disable = vmx_hardware_disable, - .has_emulated_msr = vmx_has_emulated_msr, - - .vm_size = sizeof(struct kvm_vmx), - .vm_init = vmx_vm_init, - .vm_destroy = vmx_vm_destroy, - - .vcpu_precreate = vmx_vcpu_precreate, - .vcpu_create = vmx_vcpu_create, - .vcpu_free = vmx_vcpu_free, - .vcpu_reset = vmx_vcpu_reset, - - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, - .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, - - .update_exception_bitmap = vmx_update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, - .get_msr = vmx_get_msr, - .set_msr = vmx_set_msr, - .get_segment_base = vmx_get_segment_base, - .get_segment = vmx_get_segment, - .set_segment = vmx_set_segment, - .get_cpl = vmx_get_cpl, - .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .is_valid_cr0 = vmx_is_valid_cr0, - .set_cr0 = vmx_set_cr0, - .is_valid_cr4 = vmx_is_valid_cr4, - .set_cr4 = vmx_set_cr4, - .set_efer = vmx_set_efer, - .get_idt = vmx_get_idt, - .set_idt = vmx_set_idt, - .get_gdt = vmx_get_gdt, - .set_gdt = vmx_set_gdt, - .set_dr7 = vmx_set_dr7, - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, - .cache_reg = vmx_cache_reg, - .get_rflags = vmx_get_rflags, - .set_rflags = vmx_set_rflags, - .get_if_flag = vmx_get_if_flag, - - .flush_tlb_all = vmx_flush_tlb_all, - .flush_tlb_current = vmx_flush_tlb_current, - .flush_tlb_gva = vmx_flush_tlb_gva, - .flush_tlb_guest = vmx_flush_tlb_guest, - - .vcpu_pre_run = vmx_vcpu_pre_run, - .vcpu_run = vmx_vcpu_run, - .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = vmx_skip_emulated_instruction, - .update_emulated_instruction = vmx_update_emulated_instruction, - .set_interrupt_shadow = vmx_set_interrupt_shadow, - .get_interrupt_shadow = vmx_get_interrupt_shadow, - .patch_hypercall = vmx_patch_hypercall, - .inject_irq = vmx_inject_irq, - .inject_nmi = vmx_inject_nmi, - .inject_exception = vmx_inject_exception, - .cancel_injection = vmx_cancel_injection, - .interrupt_allowed = vmx_interrupt_allowed, - .nmi_allowed = vmx_nmi_allowed, - .get_nmi_mask = vmx_get_nmi_mask, - .set_nmi_mask = vmx_set_nmi_mask, - .enable_nmi_window = vmx_enable_nmi_window, - .enable_irq_window = vmx_enable_irq_window, - .update_cr8_intercept = vmx_update_cr8_intercept, - .set_virtual_apic_mode = vmx_set_virtual_apic_mode, - .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, - .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_pre_state_restore = vmx_apicv_pre_state_restore, - .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, - .hwapic_irr_update = vmx_hwapic_irr_update, - .hwapic_isr_update = vmx_hwapic_isr_update, - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, - .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_interrupt = vmx_deliver_interrupt, - .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, - - .set_tss_addr = vmx_set_tss_addr, - .set_identity_map_addr = vmx_set_identity_map_addr, - .get_mt_mask = vmx_get_mt_mask, - - .get_exit_info = vmx_get_exit_info, - - .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .get_l2_tsc_offset = vmx_get_l2_tsc_offset, - .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier, - .write_tsc_offset = vmx_write_tsc_offset, - .write_tsc_multiplier = vmx_write_tsc_multiplier, - - .load_mmu_pgd = vmx_load_mmu_pgd, - - .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, - - .sched_in = vmx_sched_in, - - .cpu_dirty_log_size = PML_ENTITY_NUM, - .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, - - .nested_ops = &vmx_nested_ops, - - .pi_update_irte = vmx_pi_update_irte, - .pi_start_assignment = vmx_pi_start_assignment, - -#ifdef CONFIG_X86_64 - .set_hv_timer = vmx_set_hv_timer, - .cancel_hv_timer = vmx_cancel_hv_timer, -#endif - - .setup_mce = vmx_setup_mce, - -#ifdef CONFIG_KVM_SMM - .smi_allowed = vmx_smi_allowed, - .enter_smm = vmx_enter_smm, - .leave_smm = vmx_leave_smm, - .enable_smi_window = vmx_enable_smi_window, -#endif - - .check_emulate_instruction = vmx_check_emulate_instruction, - .apic_init_signal_blocked = vmx_apic_init_signal_blocked, - .migrate_timers = vmx_migrate_timers, - - .msr_filter_changed = vmx_msr_filter_changed, - .complete_emulated_msr = kvm_complete_insn_gp, - - .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, - - .get_untagged_addr = vmx_get_untagged_addr, -}; - static unsigned int vmx_handle_intel_pt_intr(void) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); @@ -8521,9 +8409,7 @@ static void __init vmx_setup_me_spte_mask(void) kvm_mmu_set_me_spte_mask(0, me_mask); } -static struct kvm_x86_init_ops vmx_init_ops __initdata; - -static __init int hardware_setup(void) +__init int vmx_hardware_setup(void) { unsigned long host_bndcfgs; struct desc_ptr dt; @@ -8592,16 +8478,16 @@ static __init int hardware_setup(void) * using the APIC_ACCESS_ADDR VMCS field. */ if (!flexpriority_enabled) - vmx_x86_ops.set_apic_access_page_addr = NULL; + vt_x86_ops.set_apic_access_page_addr = NULL; if (!cpu_has_vmx_tpr_shadow()) - vmx_x86_ops.update_cr8_intercept = NULL; + vt_x86_ops.update_cr8_intercept = NULL; #if IS_ENABLED(CONFIG_HYPERV) if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH && enable_ept) { - vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; - vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; + vt_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs; + vt_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range; } #endif @@ -8616,7 +8502,7 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_apicv()) enable_apicv = 0; if (!enable_apicv) - vmx_x86_ops.sync_pir_to_irr = NULL; + vt_x86_ops.sync_pir_to_irr = NULL; if (!enable_apicv || !cpu_has_vmx_ipiv()) enable_ipiv = false; @@ -8652,7 +8538,7 @@ static __init int hardware_setup(void) enable_pml = 0; if (!enable_pml) - vmx_x86_ops.cpu_dirty_log_size = 0; + vt_x86_ops.cpu_dirty_log_size = 0; if (!cpu_has_vmx_preemption_timer()) enable_preemption_timer = false; @@ -8677,8 +8563,8 @@ static __init int hardware_setup(void) } if (!enable_preemption_timer) { - vmx_x86_ops.set_hv_timer = NULL; - vmx_x86_ops.cancel_hv_timer = NULL; + vt_x86_ops.set_hv_timer = NULL; + vt_x86_ops.cancel_hv_timer = NULL; } kvm_caps.supported_mce_cap |= MCG_LMCE_P; @@ -8689,9 +8575,9 @@ static __init int hardware_setup(void) if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt()) pt_mode = PT_MODE_SYSTEM; if (pt_mode == PT_MODE_HOST_GUEST) - vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; + vt_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; else - vmx_init_ops.handle_intel_pt_intr = NULL; + vt_init_ops.handle_intel_pt_intr = NULL; setup_default_sgx_lepubkeyhash(); @@ -8714,14 +8600,6 @@ static __init int hardware_setup(void) return r; } -static struct kvm_x86_init_ops vmx_init_ops __initdata = { - .hardware_setup = hardware_setup, - .handle_intel_pt_intr = NULL, - - .runtime_ops = &vmx_x86_ops, - .pmu_ops = &intel_pmu_ops, -}; - static void vmx_cleanup_l1d_flush(void) { if (vmx_l1d_flush_pages) { @@ -8763,7 +8641,7 @@ static int __init vmx_init(void) */ hv_init_evmcs(); - r = kvm_x86_vendor_init(&vmx_init_ops); + r = kvm_x86_vendor_init(&vt_init_ops); if (r) return r; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7e483366b31e..7b64e271a931 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -365,6 +365,9 @@ struct vcpu_vmx { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); } shadow_msr_intercept; + + /* ve_info must be page aligned. */ + struct vmx_ve_information *ve_info; }; struct kvm_vmx { @@ -577,7 +580,8 @@ static inline u8 vmx_get_rvi(void) SECONDARY_EXEC_ENABLE_VMFUNC | \ SECONDARY_EXEC_BUS_LOCK_DETECTION | \ SECONDARY_EXEC_NOTIFY_VM_EXITING | \ - SECONDARY_EXEC_ENCLS_EXITING) + SECONDARY_EXEC_ENCLS_EXITING | \ + SECONDARY_EXEC_EPT_VIOLATION_VE) #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h new file mode 100644 index 000000000000..502704596c83 --- /dev/null +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_X86_OPS_H +#define __KVM_X86_VMX_X86_OPS_H + +#include <linux/kvm_host.h> + +#include "x86.h" + +__init int vmx_hardware_setup(void); + +extern struct kvm_x86_ops vt_x86_ops __initdata; +extern struct kvm_x86_init_ops vt_init_ops __initdata; + +void vmx_hardware_unsetup(void); +int vmx_check_processor_compat(void); +int vmx_hardware_enable(void); +void vmx_hardware_disable(void); +int vmx_vm_init(struct kvm *kvm); +void vmx_vm_destroy(struct kvm *kvm); +int vmx_vcpu_precreate(struct kvm *kvm); +int vmx_vcpu_create(struct kvm_vcpu *vcpu); +int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu); +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit); +void vmx_vcpu_free(struct kvm_vcpu *vcpu); +void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void vmx_vcpu_put(struct kvm_vcpu *vcpu); +int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath); +void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu); +int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu); +void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu); +int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +#ifdef CONFIG_KVM_SMM +int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection); +int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram); +int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram); +void vmx_enable_smi_window(struct kvm_vcpu *vcpu); +#endif +int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); +int vmx_check_intercept(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info, + enum x86_intercept_stage stage, + struct x86_exception *exception); +bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu); +void vmx_migrate_timers(struct kvm_vcpu *vcpu); +void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); +void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); +bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason); +void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); +void vmx_hwapic_isr_update(int max_isr); +bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu); +int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); +void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); +void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); +bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); +void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); +void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); +int vmx_get_msr_feature(struct kvm_msr_entry *msr); +int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); +void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int vmx_get_cpl(struct kvm_vcpu *vcpu); +void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); +void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer); +void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val); +void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu); +void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg); +unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); +void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +bool vmx_get_if_flag(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_all(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_current(struct kvm_vcpu *vcpu); +void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr); +void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu); +void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask); +u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu); +void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall); +void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected); +void vmx_inject_nmi(struct kvm_vcpu *vcpu); +void vmx_inject_exception(struct kvm_vcpu *vcpu); +void vmx_cancel_injection(struct kvm_vcpu *vcpu); +int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection); +int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection); +bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); +void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); +void vmx_enable_nmi_window(struct kvm_vcpu *vcpu); +void vmx_enable_irq_window(struct kvm_vcpu *vcpu); +void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr); +void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu); +void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); +void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); +int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); +int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr); +u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); +void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code); +u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); +u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); +void vmx_write_tsc_offset(struct kvm_vcpu *vcpu); +void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu); +void vmx_request_immediate_exit(struct kvm_vcpu *vcpu); +void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu); +void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +#ifdef CONFIG_X86_64 +int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, + bool *expired); +void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu); +#endif +void vmx_setup_mce(struct kvm_vcpu *vcpu); + +#endif /* __KVM_X86_VMX_X86_OPS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91478b769af0..082ac6d95a3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -92,9 +92,12 @@ #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 -struct kvm_caps kvm_caps __read_mostly = { - .supported_mce_cap = MCG_CTL_P | MCG_SER_P, -}; +/* + * Note, kvm_caps fields should *never* have default values, all fields must be + * recomputed from scratch during vendor module load, e.g. to account for a + * vendor module being reloaded with different module parameters. + */ +struct kvm_caps kvm_caps __read_mostly; EXPORT_SYMBOL_GPL(kvm_caps); #define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) @@ -2230,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) /* * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does * not support modifying the guest vCPU model on the fly, e.g. changing - * the nVMX capabilities while L2 is running is nonsensical. Ignore + * the nVMX capabilities while L2 is running is nonsensical. Allow * writes of the same value, e.g. to allow userspace to blindly stuff * all MSRs when emulating RESET. */ - if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { - if (do_get_msr(vcpu, index, &val) || *data != val) - return -EINVAL; - - return 0; - } + if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) && + (do_get_msr(vcpu, index, &val) || *data != val)) + return -EINVAL; return kvm_set_msr_ignored_check(vcpu, index, *data, true); } @@ -4629,9 +4629,7 @@ static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, static bool kvm_is_vm_type_supported(unsigned long type) { - return type == KVM_X86_DEFAULT_VM || - (type == KVM_X86_SW_PROTECTED_VM && - IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled); + return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -4832,9 +4830,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_caps.has_notify_vmexit; break; case KVM_CAP_VM_TYPES: - r = BIT(KVM_X86_DEFAULT_VM); - if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM)) - r |= BIT(KVM_X86_SW_PROTECTED_VM); + r = kvm_caps.supported_vm_types; break; default: break; @@ -4842,46 +4838,44 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } -static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +static int __kvm_x86_dev_get_attr(struct kvm_device_attr *attr, u64 *val) { - void __user *uaddr = (void __user*)(unsigned long)attr->addr; - - if ((u64)(unsigned long)uaddr != attr->addr) - return ERR_PTR_USR(-EFAULT); - return uaddr; -} - -static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) -{ - u64 __user *uaddr = kvm_get_attr_addr(attr); - - if (attr->group) + if (attr->group) { + if (kvm_x86_ops.dev_get_attr) + return static_call(kvm_x86_dev_get_attr)(attr->group, attr->attr, val); return -ENXIO; - - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); + } switch (attr->attr) { case KVM_X86_XCOMP_GUEST_SUPP: - if (put_user(kvm_caps.supported_xcr0, uaddr)) - return -EFAULT; + *val = kvm_caps.supported_xcr0; return 0; default: return -ENXIO; } } +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = u64_to_user_ptr(attr->addr); + int r; + u64 val; + + r = __kvm_x86_dev_get_attr(attr, &val); + if (r < 0) + return r; + + if (put_user(val, uaddr)) + return -EFAULT; + + return 0; +} + static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) { - if (attr->group) - return -ENXIO; + u64 val; - switch (attr->attr) { - case KVM_X86_XCOMP_GUEST_SUPP: - return 0; - default: - return -ENXIO; - } + return __kvm_x86_dev_get_attr(attr, &val); } long kvm_arch_dev_ioctl(struct file *filp, @@ -5557,11 +5551,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, - struct kvm_debugregs *dbgregs) +static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + memset(dbgregs, 0, sizeof(*dbgregs)); BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db)); @@ -5570,6 +5568,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, dbgregs->dr6 = vcpu->arch.dr6; dbgregs->dr7 = vcpu->arch.dr7; + return 0; } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -5577,6 +5576,10 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, { unsigned int i; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (dbgregs->flags) return -EINVAL; @@ -5597,8 +5600,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { /* * Only copy state for features that are enabled for the guest. The @@ -5616,24 +5619,25 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, XFEATURE_MASK_FPSSE; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, supported_xcr0, vcpu->arch.pkru); + return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) +static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu, guest_xsave->region, @@ -5641,18 +5645,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, &vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, - struct kvm_xcrs *guest_xcrs) +static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, + struct kvm_xcrs *guest_xcrs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { guest_xcrs->nr_xcrs = 0; - return; + return 0; } guest_xcrs->nr_xcrs = 1; guest_xcrs->flags = 0; guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; + return 0; } static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, @@ -5660,6 +5669,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, { int i, r = 0; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -EINVAL; @@ -5712,12 +5725,9 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = kvm_get_attr_addr(attr); + u64 __user *uaddr = u64_to_user_ptr(attr->addr); int r; - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); - switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: r = -EFAULT; @@ -5735,13 +5745,10 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = kvm_get_attr_addr(attr); + u64 __user *uaddr = u64_to_user_ptr(attr->addr); struct kvm *kvm = vcpu->kvm; int r; - if (IS_ERR(uaddr)) - return PTR_ERR(uaddr); - switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: { u64 offset, tsc, ns; @@ -6048,7 +6055,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_DEBUGREGS: { struct kvm_debugregs dbgregs; - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, &dbgregs, @@ -6078,7 +6087,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) @@ -6107,7 +6118,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xsave) break; - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xsave, size)) @@ -6123,7 +6136,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!u.xcrs) break; - kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); + if (r < 0) + break; r = -EFAULT; if (copy_to_user(argp, u.xcrs, @@ -6267,6 +6282,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_GET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL); r = -ENOMEM; if (!u.sregs2) @@ -6279,6 +6299,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_SREGS2: { + r = -EINVAL; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + goto out; + u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2)); if (IS_ERR(u.sregs2)) { r = PTR_ERR(u.sregs2); @@ -9732,6 +9757,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return -EIO; } + memset(&kvm_caps, 0, sizeof(kvm_caps)); + x86_emulator_cache = kvm_alloc_emulator_cache(); if (!x86_emulator_cache) { pr_err("failed to allocate cache for x86 emulator\n"); @@ -9750,6 +9777,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (r) goto out_free_percpu; + kvm_caps.supported_vm_types = BIT(KVM_X86_DEFAULT_VM); + kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P; + if (boot_cpu_has(X86_FEATURE_XSAVE)) { host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; @@ -9795,6 +9825,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_register_perf_callbacks(ops->handle_intel_pt_intr); + if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled) + kvm_caps.supported_vm_types |= BIT(KVM_X86_SW_PROTECTED_VM); + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; @@ -9995,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits, static void kvm_apicv_init(struct kvm *kvm) { - unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons; + enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT : + APICV_INHIBIT_REASON_DISABLE; - init_rwsem(&kvm->arch.apicv_update_lock); - - set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true); + set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true); - if (!enable_apicv) - set_or_clear_apicv_inhibit(inhibits, - APICV_INHIBIT_REASON_DISABLE, true); + init_rwsem(&kvm->arch.apicv_update_lock); } static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) @@ -10051,26 +10081,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl) { - unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit; - - if (kvm_xen_hypercall_enabled(vcpu->kvm)) - return kvm_xen_hypercall(vcpu); - - if (kvm_hv_hypercall_enabled(vcpu)) - return kvm_hv_hypercall(vcpu); - - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); + unsigned long ret; trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -10079,7 +10098,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a3 &= 0xFFFFFFFF; } - if (static_call(kvm_x86_get_cpl)(vcpu) != 0) { + if (cpl) { ret = -KVM_EPERM; goto out; } @@ -10140,18 +10159,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; + /* stat is incremented on completion. */ return 0; } default: ret = -KVM_ENOSYS; break; } + out: + ++vcpu->stat.hypercalls; + return ret; +} +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +{ + unsigned long nr, a0, a1, a2, a3, ret; + int op_64_bit; + int cpl; + + if (kvm_xen_hypercall_enabled(vcpu->kvm)) + return kvm_xen_hypercall(vcpu); + + if (kvm_hv_hypercall_enabled(vcpu)) + return kvm_hv_hypercall(vcpu); + + nr = kvm_rax_read(vcpu); + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); + cpl = static_call(kvm_x86_get_cpl)(vcpu); + + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) + /* MAP_GPA tosses the request to the user space. */ + return 0; + if (!op_64_bit) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -11486,6 +11536,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_regs(vcpu, regs); vcpu_put(vcpu); @@ -11527,6 +11581,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __set_regs(vcpu, regs); vcpu_put(vcpu); @@ -11599,6 +11657,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); __get_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11866,6 +11928,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { int ret; + if (vcpu->kvm->arch.has_protected_state && + vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); ret = __set_sregs(vcpu, sregs); vcpu_put(vcpu); @@ -11983,7 +12049,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -12006,7 +12072,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) struct fxregs_state *fxsave; if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return 0; + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0; vcpu_load(vcpu); @@ -12532,6 +12598,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EINVAL; kvm->arch.vm_type = type; + kvm->arch.has_private_mem = + (type == KVM_X86_SW_PROTECTED_VM); ret = kvm_page_track_init(kvm); if (ret) @@ -12731,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot) int i; for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - kvfree(slot->arch.rmap[i]); + vfree(slot->arch.rmap[i]); slot->arch.rmap[i] = NULL; } } @@ -12743,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) memslot_rmap_free(slot); for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { - kvfree(slot->arch.lpage_info[i - 1]); + vfree(slot->arch.lpage_info[i - 1]); slot->arch.lpage_info[i - 1] = NULL; } @@ -12835,7 +12903,7 @@ out_free: memslot_rmap_free(slot); for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) { - kvfree(slot->arch.lpage_info[i - 1]); + vfree(slot->arch.lpage_info[i - 1]); slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a8b71803777b..d80a4c6b5a38 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -24,6 +24,8 @@ struct kvm_caps { bool has_bus_lock_exit; /* notify VM exit supported? */ bool has_notify_vmexit; + /* bit mask of VM types */ + u32 supported_vm_types; u64 supported_mce_cap; u64 supported_xcr0; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6b43b6480354..a51f22c90a7a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -7,6 +7,7 @@ #include <linux/swapops.h> #include <linux/kmemleak.h> #include <linux/sched/task.h> +#include <linux/execmem.h> #include <asm/set_memory.h> #include <asm/cpu_device_id.h> @@ -1095,3 +1096,31 @@ unsigned long arch_max_swapfile_size(void) return pages; } #endif + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + unsigned long start, offset = 0; + + if (kaslr_enabled()) + offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; + + start = MODULES_VADDR + offset; + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ |