aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/arm/Kconfig23
-rw-r--r--arch/arm/configs/collie_defconfig2
-rw-r--r--arch/arm/configs/keystone_defconfig2
-rw-r--r--arch/arm/configs/lpc18xx_defconfig2
-rw-r--r--arch/arm/configs/moxart_defconfig2
-rw-r--r--arch/arm/configs/mps2_defconfig2
-rw-r--r--arch/arm/configs/omap1_defconfig2
-rw-r--r--arch/arm/configs/stm32_defconfig2
-rw-r--r--arch/arm/include/asm/assembler.h1
-rw-r--r--arch/arm/include/asm/glue-cache.h28
-rw-r--r--arch/arm/include/asm/hw_breakpoint.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level-hwdef.h26
-rw-r--r--arch/arm/include/asm/proc-fns.h12
-rw-r--r--arch/arm/include/asm/ptrace.h1
-rw-r--r--arch/arm/include/asm/uaccess-asm.h58
-rw-r--r--arch/arm/include/asm/uaccess.h45
-rw-r--r--arch/arm/kernel/asm-offsets.c1
-rw-r--r--arch/arm/kernel/entry-ftrace.S4
-rw-r--r--arch/arm/kernel/hw_breakpoint.c35
-rw-r--r--arch/arm/kernel/module.c34
-rw-r--r--arch/arm/kernel/suspend.c8
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S20
-rw-r--r--arch/arm/lib/delay-loop.S16
-rw-r--r--arch/arm/mm/Makefile3
-rw-r--r--arch/arm/mm/cache-b15-rac.c1
-rw-r--r--arch/arm/mm/cache-fa.S47
-rw-r--r--arch/arm/mm/cache-nop.S61
-rw-r--r--arch/arm/mm/cache-v4.S55
-rw-r--r--arch/arm/mm/cache-v4wb.S49
-rw-r--r--arch/arm/mm/cache-v4wt.S55
-rw-r--r--arch/arm/mm/cache-v6.S51
-rw-r--r--arch/arm/mm/cache-v7.S76
-rw-r--r--arch/arm/mm/cache-v7m.S55
-rw-r--r--arch/arm/mm/cache.c663
-rw-r--r--arch/arm/mm/fault.c29
-rw-r--r--arch/arm/mm/init.c45
-rw-r--r--arch/arm/mm/mmu.c7
-rw-r--r--arch/arm/mm/proc-arm1020.S69
-rw-r--r--arch/arm/mm/proc-arm1020e.S70
-rw-r--r--arch/arm/mm/proc-arm1022.S69
-rw-r--r--arch/arm/mm/proc-arm1026.S70
-rw-r--r--arch/arm/mm/proc-arm720.S25
-rw-r--r--arch/arm/mm/proc-arm740.S26
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S34
-rw-r--r--arch/arm/mm/proc-arm920.S76
-rw-r--r--arch/arm/mm/proc-arm922.S69
-rw-r--r--arch/arm/mm/proc-arm925.S66
-rw-r--r--arch/arm/mm/proc-arm926.S75
-rw-r--r--arch/arm/mm/proc-arm940.S69
-rw-r--r--arch/arm/mm/proc-arm946.S65
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S26
-rw-r--r--arch/arm/mm/proc-fa526.S24
-rw-r--r--arch/arm/mm/proc-feroceon.S105
-rw-r--r--arch/arm/mm/proc-macros.S33
-rw-r--r--arch/arm/mm/proc-mohawk.S74
-rw-r--r--arch/arm/mm/proc-sa110.S23
-rw-r--r--arch/arm/mm/proc-sa1100.S31
-rw-r--r--arch/arm/mm/proc-v6.S31
-rw-r--r--arch/arm/mm/proc-v7-2level.S8
-rw-r--r--arch/arm/mm/proc-v7-3level.S8
-rw-r--r--arch/arm/mm/proc-v7.S66
-rw-r--r--arch/arm/mm/proc-v7m.S41
-rw-r--r--arch/arm/mm/proc-xsc3.S75
-rw-r--r--arch/arm/mm/proc-xscale.S125
-rw-r--r--arch/arm/mm/proc.c500
-rw-r--r--arch/arm/mm/tlb-fa.S12
-rw-r--r--arch/arm/mm/tlb-v4.S15
-rw-r--r--arch/arm/mm/tlb-v4wb.S12
-rw-r--r--arch/arm/mm/tlb-v4wbi.S12
-rw-r--r--arch/arm/mm/tlb-v6.S12
-rw-r--r--arch/arm/mm/tlb-v7.S14
-rw-r--r--arch/arm/mm/tlb.c84
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts40
-rw-r--r--arch/arm64/include/asm/esr.h12
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h16
-rw-r--r--arch/arm64/include/asm/kvm_host.h156
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/kvm_nested.h13
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h21
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/asm/virt.h12
-rw-r--r--arch/arm64/kernel/module.c126
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c4
-rw-r--r--arch/arm64/kernel/probes/kprobes.c7
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/arm.c209
-rw-r--r--arch/arm64/kvm/emulate-nested.c66
-rw-r--r--arch/arm64/kvm/fpsimd.c69
-rw-r--r--arch/arm64/kvm/handle_exit.c36
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h8
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h86
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c115
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c21
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c27
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c109
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c26
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c42
-rw-r--r--arch/arm64/kvm/nested.c8
-rw-r--r--arch/arm64/kvm/pauth.c206
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/pmu.c2
-rw-r--r--arch/arm64/kvm/reset.c1
-rw-r--r--arch/arm64/kvm/sys_regs.c69
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c82
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c90
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c352
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c23
-rw-r--r--arch/arm64/kvm/vgic/vgic.c17
-rw-r--r--arch/arm64/kvm/vgic/vgic.h8
-rw-r--r--arch/arm64/mm/init.c140
-rw-r--r--arch/arm64/net/bpf_jit_comp.c11
-rw-r--r--arch/csky/kernel/probes/ftrace.c3
-rw-r--r--arch/loongarch/Kconfig9
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/hardirq.h6
-rw-r--r--arch/loongarch/include/asm/inst.h2
-rw-r--r--arch/loongarch/include/asm/irq.h11
-rw-r--r--arch/loongarch/include/asm/kvm_host.h34
-rw-r--r--arch/loongarch/include/asm/kvm_para.h161
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h11
-rw-r--r--arch/loongarch/include/asm/loongarch.h12
-rw-r--r--arch/loongarch/include/asm/paravirt.h30
-rw-r--r--arch/loongarch/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/loongarch/include/asm/smp.h22
-rw-r--r--arch/loongarch/include/uapi/asm/kvm.h4
-rw-r--r--arch/loongarch/kernel/Makefile1
-rw-r--r--arch/loongarch/kernel/ftrace_dyn.c3
-rw-r--r--arch/loongarch/kernel/irq.c24
-rw-r--r--arch/loongarch/kernel/module.c6
-rw-r--r--arch/loongarch/kernel/paravirt.c151
-rw-r--r--arch/loongarch/kernel/perf_event.c14
-rw-r--r--arch/loongarch/kernel/smp.c52
-rw-r--r--arch/loongarch/kernel/time.c12
-rw-r--r--arch/loongarch/kvm/exit.c151
-rw-r--r--arch/loongarch/kvm/mmu.c32
-rw-r--r--arch/loongarch/kvm/trace.h20
-rw-r--r--arch/loongarch/kvm/vcpu.c105
-rw-r--r--arch/loongarch/kvm/vm.c11
-rw-r--r--arch/loongarch/mm/init.c21
-rw-r--r--arch/m68k/include/asm/pgtable.h2
-rw-r--r--arch/microblaze/configs/mmu_defconfig2
-rw-r--r--arch/mips/configs/rs90_defconfig2
-rw-r--r--arch/mips/include/asm/pgtable-64.h4
-rw-r--r--arch/mips/kernel/module.c10
-rw-r--r--arch/mips/kvm/mmu.c30
-rw-r--r--arch/mips/mm/fault.c4
-rw-r--r--arch/mips/mm/init.c23
-rw-r--r--arch/nios2/include/asm/pgtable.h5
-rw-r--r--arch/nios2/kernel/module.c20
-rw-r--r--arch/nios2/mm/init.c21
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig2
-rw-r--r--arch/parisc/include/asm/page.h1
-rw-r--r--arch/parisc/include/asm/signal.h12
-rw-r--r--arch/parisc/include/uapi/asm/signal.h10
-rw-r--r--arch/parisc/kernel/ftrace.c3
-rw-r--r--arch/parisc/kernel/module.c12
-rw-r--r--arch/parisc/math-emu/driver.c6
-rw-r--r--arch/parisc/mm/init.c23
-rw-r--r--arch/powerpc/Kbuild3
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/Makefile6
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/decompress.c2
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/b4si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9131rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132qds.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/c293pcie.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/c293si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi14
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts16
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts16
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb.dtsi16
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010si-post.dtsi16
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020si-post.dtsi5
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021si-post.dtsi5
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022si-post.dtsi7
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020si-post.dtsi17
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-power.dtsi19
-rw-r--r--arch/powerpc/boot/dts/fsl/t1023si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t2081si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240si-post.dtsi2
-rw-r--r--arch/powerpc/boot/main.c2
-rw-r--r--arch/powerpc/boot/ps3.c2
-rw-r--r--arch/powerpc/configs/adder875_defconfig2
-rw-r--r--arch/powerpc/configs/ep88xc_defconfig2
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig2
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig2
-rw-r--r--arch/powerpc/configs/tqm8xx_defconfig2
-rw-r--r--arch/powerpc/include/asm/cpu_has_feature.h2
-rw-r--r--arch/powerpc/include/asm/eeh.h2
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h36
-rw-r--r--arch/powerpc/include/asm/fadump.h2
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h2
-rw-r--r--arch/powerpc/include/asm/hvcall.h10
-rw-r--r--arch/powerpc/include/asm/interrupt.h10
-rw-r--r--arch/powerpc/include/asm/io.h28
-rw-r--r--arch/powerpc/include/asm/kasan.h2
-rw-r--r--arch/powerpc/include/asm/kexec.h15
-rw-r--r--arch/powerpc/include/asm/kexec_ranges.h20
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/include/asm/mmu.h2
-rw-r--r--arch/powerpc/include/asm/module.h5
-rw-r--r--arch/powerpc/include/asm/opal-api.h4
-rw-r--r--arch/powerpc/include/asm/percpu.h10
-rw-r--r--arch/powerpc/include/asm/pmac_feature.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h4
-rw-r--r--arch/powerpc/include/asm/processor.h13
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/uninorth.h2
-rw-r--r--arch/powerpc/include/uapi/asm/bootx.h2
-rw-r--r--arch/powerpc/include/uapi/asm/papr_pdsm.h165
-rw-r--r--arch/powerpc/kernel/Makefile7
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S4
-rw-r--r--arch/powerpc/kernel/dexcr.c124
-rw-r--r--arch/powerpc/kernel/eeh.c11
-rw-r--r--arch/powerpc/kernel/eeh_driver.c13
-rw-r--r--arch/powerpc/kernel/eeh_pe.c8
-rw-r--r--arch/powerpc/kernel/fadump.c542
-rw-r--r--arch/powerpc/kernel/head_8xx.S4
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S6
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c3
-rw-r--r--arch/powerpc/kernel/kprobes.c22
-rw-r--r--arch/powerpc/kernel/misc_64.S4
-rw-r--r--arch/powerpc/kernel/module.c40
-rw-r--r--arch/powerpc/kernel/process.c29
-rw-r--r--arch/powerpc/kernel/prom.c23
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c2
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c7
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c4
-rw-r--r--arch/powerpc/kexec/Makefile4
-rw-r--r--arch/powerpc/kexec/core_64.c91
-rw-r--r--arch/powerpc/kexec/crash.c195
-rw-r--r--arch/powerpc/kexec/elf_64.c3
-rw-r--r--arch/powerpc/kexec/file_load_64.c314
-rw-r--r--arch/powerpc/kexec/ranges.c312
-rw-r--r--arch/powerpc/kvm/book3s.c9
-rw-r--r--arch/powerpc/kvm/book3s.h1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c4
-rw-r--r--arch/powerpc/kvm/book3s_pr.c7
-rw-r--r--arch/powerpc/kvm/book3s_xive.c2
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c6
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/code-patching.c33
-rw-r--r--arch/powerpc/lib/feature-fixups.c8
-rw-r--r--arch/powerpc/lib/test-code-patching.c92
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c2
-rw-r--r--arch/powerpc/mm/book3s64/Makefile2
-rw-r--r--arch/powerpc/mm/cacheflush.c2
-rw-r--r--arch/powerpc/mm/kasan/init_book3e_64.c2
-rw-r--r--arch/powerpc/mm/kasan/init_book3s_64.c2
-rw-r--r--arch/powerpc/mm/mem.c66
-rw-r--r--arch/powerpc/mm/nohash/Makefile2
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c10
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c137
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c77
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c2
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_sleep.S6
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_common.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend-asm.S6
-rw-r--r--arch/powerpc/platforms/85xx/smp.c9
-rw-r--r--arch/powerpc/platforms/cell/iommu.c17
-rw-r--r--arch/powerpc/platforms/cell/smp.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c20
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/maple/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/pic.c2
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci-sriov.c4
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c2
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c61
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c6
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c45
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c43
-rw-r--r--arch/powerpc/platforms/pseries/pci.c27
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c322
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h29
-rw-r--r--arch/powerpc/platforms/pseries/vas.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c8
-rw-r--r--arch/powerpc/sysdev/Makefile2
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c4
-rw-r--r--arch/powerpc/sysdev/fsl_gtm.c6
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c4
-rw-r--r--arch/powerpc/sysdev/xive/native.c2
-rw-r--r--arch/powerpc/xmon/Makefile2
-rw-r--r--arch/powerpc/xmon/xmon.c6
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig2
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig2
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig2
-rw-r--r--arch/riscv/include/asm/csr.h5
-rw-r--r--arch/riscv/include/asm/kvm_host.h21
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_pmu.h16
-rw-r--r--arch/riscv/include/asm/pgtable.h3
-rw-r--r--arch/riscv/include/asm/sbi.h38
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h1
-rw-r--r--arch/riscv/kernel/module.c12
-rw-r--r--arch/riscv/kernel/paravirt.c6
-rw-r--r--arch/riscv/kernel/probes/ftrace.c3
-rw-r--r--arch/riscv/kernel/probes/kprobes.c10
-rw-r--r--arch/riscv/kvm/aia.c5
-rw-r--r--arch/riscv/kvm/main.c18
-rw-r--r--arch/riscv/kvm/mmu.c20
-rw-r--r--arch/riscv/kvm/vcpu.c85
-rw-r--r--arch/riscv/kvm/vcpu_exit.c4
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c6
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c260
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c7
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c42
-rw-r--r--arch/riscv/kvm/vcpu_sbi_pmu.c17
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c4
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--arch/riscv/mm/init.c35
-rw-r--r--arch/riscv/net/bpf_jit_core.c13
-rw-r--r--arch/s390/kernel/ftrace.c7
-rw-r--r--arch/s390/kernel/kprobes.c4
-rw-r--r--arch/s390/kernel/module.c42
-rw-r--r--arch/s390/mm/init.c30
-rw-r--r--arch/sh/configs/edosk7705_defconfig2
-rw-r--r--arch/sh/configs/se7619_defconfig2
-rw-r--r--arch/sh/configs/se7712_defconfig2
-rw-r--r--arch/sh/configs/se7721_defconfig2
-rw-r--r--arch/sh/configs/shmin_defconfig2
-rw-r--r--arch/sparc/include/asm/pgtable_32.h2
-rw-r--r--arch/sparc/kernel/module.c30
-rw-r--r--arch/sparc/mm/Makefile2
-rw-r--r--arch/sparc/mm/execmem.c21
-rw-r--r--arch/sparc/net/bpf_jit_comp_32.c8
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/fpu/api.h3
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h63
-rw-r--r--arch/x86/include/asm/mpspec.h6
-rw-r--r--arch/x86/include/asm/sev-common.h8
-rw-r--r--arch/x86/include/asm/vmx.h13
-rw-r--r--arch/x86/include/uapi/asm/kvm.h22
-rw-r--r--arch/x86/kernel/crash.c32
-rw-r--r--arch/x86/kernel/fpu/xstate.c1
-rw-r--r--arch/x86/kernel/fpu/xstate.h2
-rw-r--r--arch/x86/kernel/ftrace.c16
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c3
-rw-r--r--arch/x86/kernel/module.c51
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile9
-rw-r--r--arch/x86/kvm/cpuid.c43
-rw-r--r--arch/x86/kvm/kvm_emulate.h1
-rw-r--r--arch/x86/kvm/mmu.h7
-rw-r--r--arch/x86/kvm/mmu/mmu.c295
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h28
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h2
-rw-r--r--arch/x86/kvm/mmu/page_track.c2
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h28
-rw-r--r--arch/x86/kvm/mmu/spte.c40
-rw-r--r--arch/x86/kvm/mmu/spte.h26
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c139
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h1
-rw-r--r--arch/x86/kvm/svm/sev.c343
-rw-r--r--arch/x86/kvm/svm/svm.c36
-rw-r--r--arch/x86/kvm/svm/svm.h56
-rw-r--r--arch/x86/kvm/trace.h4
-rw-r--r--arch/x86/kvm/vmx/main.c167
-rw-r--r--arch/x86/kvm/vmx/nested.c30
-rw-r--r--arch/x86/kvm/vmx/vmcs.h5
-rw-r--r--arch/x86/kvm/vmx/vmx.c440
-rw-r--r--arch/x86/kvm/vmx/vmx.h6
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h124
-rw-r--r--arch/x86/kvm/x86.c262
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/mm/init.c29
412 files changed, 9143 insertions, 4648 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 93404c802d29..b34946d90e4b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -60,9 +60,9 @@ config GENERIC_ENTRY
config KPROBES
bool "Kprobes"
- depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+ select EXECMEM
select NEED_TASKS_RCU
help
Kprobes allows you to trap at almost any kernel address and
@@ -977,6 +977,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
For architectures like powerpc/32 which have constraints on module
allocation and need to allocate module data outside of module area.
+config ARCH_WANTS_EXECMEM_LATE
+ bool
+ help
+ For architectures that do not allocate executable memory early on
+ boot, but rather require its initialization late when there is
+ enough entropy for module space randomization, for instance
+ arm64.
+
config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b14aed3a17ab..749d6e3788b4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_CFI_CLANG
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
@@ -1233,9 +1234,9 @@ config HIGHPTE
consumed by page tables. Setting this option will allow
user-space 2nd level page tables to reside in high memory.
-config CPU_SW_DOMAIN_PAN
- bool "Enable use of CPU domains to implement privileged no-access"
- depends on MMU && !ARM_LPAE
+config ARM_PAN
+ bool "Enable privileged no-access"
+ depends on MMU
default y
help
Increase kernel security by ensuring that normal kernel accesses
@@ -1244,10 +1245,26 @@ config CPU_SW_DOMAIN_PAN
by ensuring that magic values (such as LIST_POISON) will always
fault when dereferenced.
+ The implementation uses CPU domains when !CONFIG_ARM_LPAE and
+ disabling of TTBR0 page table walks with CONFIG_ARM_LPAE.
+
+config CPU_SW_DOMAIN_PAN
+ def_bool y
+ depends on ARM_PAN && !ARM_LPAE
+ help
+ Enable use of CPU domains to implement privileged no-access.
+
CPUs with low-vector mappings use a best-efforts implementation.
Their lower 1MB needs to remain accessible for the vectors, but
the remainder of userspace will become appropriately inaccessible.
+config CPU_TTBR0_PAN
+ def_bool y
+ depends on ARM_PAN && ARM_LPAE
+ help
+ Enable privileged no-access by disabling TTBR0 page table walks when
+ running in kernel mode.
+
config HW_PERF_EVENTS
def_bool y
depends on ARM_PMU
diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig
index 01b5a5a73f03..42cb1c854118 100644
--- a/arch/arm/configs/collie_defconfig
+++ b/arch/arm/configs/collie_defconfig
@@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_EPOLL is not set
CONFIG_ARCH_MULTI_V4=y
# CONFIG_ARCH_MULTI_V7 is not set
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 59c4835ffc97..c1291ca290b2 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -12,7 +12,7 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EXPERT=y
CONFIG_PROFILING=y
diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig
index d169da9b2824..f55c231e0870 100644
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_RD_LZ4 is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_UID16 is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index 1d41e73f4903..34d079e03b3c 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_SIGNALFD is not set
# CONFIG_TIMERFD is not set
# CONFIG_EVENTFD is not set
diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig
index 3ed73f184d83..e995e50537ef 100644
--- a/arch/arm/configs/mps2_defconfig
+++ b/arch/arm/configs/mps2_defconfig
@@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=16
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EXPERT=y
# CONFIG_UID16 is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 729ea8157e2a..025b595dd837 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -9,7 +9,7 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_SHMEM is not set
# CONFIG_KALLSYMS is not set
CONFIG_PROFILING=y
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index b9fe3fbed5ae..3baec075d1ef 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -6,7 +6,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EXPERT=y
# CONFIG_UID16 is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index aebe2c8f6a68..d33c1e24e00b 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -21,6 +21,7 @@
#include <asm/opcodes-virt.h>
#include <asm/asm-offsets.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/thread_info.h>
#include <asm/uaccess-asm.h>
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index 724f8dac1e5b..4186fbf7341f 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -118,6 +118,10 @@
# define MULTI_CACHE 1
#endif
+#ifdef CONFIG_CPU_CACHE_NOP
+# define MULTI_CACHE 1
+#endif
+
#if defined(CONFIG_CPU_V7M)
# define MULTI_CACHE 1
#endif
@@ -126,29 +130,15 @@
#error Unknown cache maintenance model
#endif
-#ifndef __ASSEMBLER__
-static inline void nop_flush_icache_all(void) { }
-static inline void nop_flush_kern_cache_all(void) { }
-static inline void nop_flush_kern_cache_louis(void) { }
-static inline void nop_flush_user_cache_all(void) { }
-static inline void nop_flush_user_cache_range(unsigned long a,
- unsigned long b, unsigned int c) { }
-
-static inline void nop_coherent_kern_range(unsigned long a, unsigned long b) { }
-static inline int nop_coherent_user_range(unsigned long a,
- unsigned long b) { return 0; }
-static inline void nop_flush_kern_dcache_area(void *a, size_t s) { }
-
-static inline void nop_dma_flush_range(const void *a, const void *b) { }
-
-static inline void nop_dma_map_area(const void *s, size_t l, int f) { }
-static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { }
-#endif
-
#ifndef MULTI_CACHE
#define __cpuc_flush_icache_all __glue(_CACHE,_flush_icache_all)
#define __cpuc_flush_kern_all __glue(_CACHE,_flush_kern_cache_all)
+/* This function only has a dedicated assembly callback on the v7 cache */
+#ifdef CONFIG_CPU_CACHE_V7
#define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_louis)
+#else
+#define __cpuc_flush_kern_louis __glue(_CACHE,_flush_kern_cache_all)
+#endif
#define __cpuc_flush_user_all __glue(_CACHE,_flush_user_cache_all)
#define __cpuc_flush_user_range __glue(_CACHE,_flush_user_cache_range)
#define __cpuc_coherent_kern_range __glue(_CACHE,_coherent_kern_range)
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index 62358d3ca0a8..e7f9961c53b2 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -84,6 +84,7 @@ static inline void decode_ctrl_reg(u32 reg,
#define ARM_DSCR_MOE(x) ((x >> 2) & 0xf)
#define ARM_ENTRY_BREAKPOINT 0x1
#define ARM_ENTRY_ASYNC_WATCHPOINT 0x2
+#define ARM_ENTRY_CFI_BREAKPOINT 0x3
#define ARM_ENTRY_SYNC_WATCHPOINT 0xa
/* DSCR monitor/halting bits. */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 2f35b4eddaa8..323ad811732e 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -74,6 +74,7 @@
#define PHYS_MASK_SHIFT (40)
#define PHYS_MASK ((1ULL << PHYS_MASK_SHIFT) - 1)
+#ifndef CONFIG_CPU_TTBR0_PAN
/*
* TTBR0/TTBR1 split (PAGE_OFFSET):
* 0x40000000: T0SZ = 2, T1SZ = 0 (not used)
@@ -93,5 +94,30 @@
#endif
#define TTBR1_SIZE (((PAGE_OFFSET >> 30) - 1) << 16)
+#else
+/*
+ * With CONFIG_CPU_TTBR0_PAN enabled, TTBR1 is only used during uaccess
+ * disabled regions when TTBR0 is disabled.
+ */
+#define TTBR1_OFFSET 0 /* pointing to swapper_pg_dir */
+#define TTBR1_SIZE 0 /* TTBR1 size controlled via TTBCR.T0SZ */
+#endif
+
+/*
+ * TTBCR register bits.
+ */
+#define TTBCR_EAE (1 << 31)
+#define TTBCR_IMP (1 << 30)
+#define TTBCR_SH1_MASK (3 << 28)
+#define TTBCR_ORGN1_MASK (3 << 26)
+#define TTBCR_IRGN1_MASK (3 << 24)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_A1 (1 << 22)
+#define TTBCR_T1SZ_MASK (7 << 16)
+#define TTBCR_SH0_MASK (3 << 12)
+#define TTBCR_ORGN0_MASK (3 << 10)
+#define TTBCR_IRGN0_MASK (3 << 8)
+#define TTBCR_EPD0 (1 << 7)
+#define TTBCR_T0SZ_MASK (7 << 0)
#endif
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 280396483f5d..b4986a23d852 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -178,6 +178,18 @@ extern void cpu_resume(void);
})
#endif
+static inline unsigned int cpu_get_ttbcr(void)
+{
+ unsigned int ttbcr;
+ asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
+ return ttbcr;
+}
+
+static inline void cpu_set_ttbcr(unsigned int ttbcr)
+{
+ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr) : "memory");
+}
+
#else /*!CONFIG_MMU */
#define cpu_switch_mm(pgd,mm) { }
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 14a38cc67e0b..6eb311fb2da0 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -20,6 +20,7 @@ struct pt_regs {
struct svc_pt_regs {
struct pt_regs regs;
u32 dacr;
+ u32 ttbcr;
};
#define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs)
diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h
index 65da32e1f1c1..4bccd895d954 100644
--- a/arch/arm/include/asm/uaccess-asm.h
+++ b/arch/arm/include/asm/uaccess-asm.h
@@ -39,8 +39,9 @@
#endif
.endm
+#if defined(CONFIG_CPU_SW_DOMAIN_PAN)
+
.macro uaccess_disable, tmp, isb=1
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/*
* Whenever we re-enter userspace, the domains should always be
* set appropriately.
@@ -50,11 +51,9 @@
.if \isb
instr_sync
.endif
-#endif
.endm
.macro uaccess_enable, tmp, isb=1
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/*
* Whenever we re-enter userspace, the domains should always be
* set appropriately.
@@ -64,15 +63,61 @@
.if \isb
instr_sync
.endif
-#endif
.endm
+#elif defined(CONFIG_CPU_TTBR0_PAN)
+
+ .macro uaccess_disable, tmp, isb=1
+ /*
+ * Disable TTBR0 page table walks (EDP0 = 1), use the reserved ASID
+ * from TTBR1 (A1 = 1) and enable TTBR1 page table walks for kernel
+ * addresses by reducing TTBR0 range to 32MB (T0SZ = 7).
+ */
+ mrc p15, 0, \tmp, c2, c0, 2 @ read TTBCR
+ orr \tmp, \tmp, #TTBCR_EPD0 | TTBCR_T0SZ_MASK
+ orr \tmp, \tmp, #TTBCR_A1
+ mcr p15, 0, \tmp, c2, c0, 2 @ write TTBCR
+ .if \isb
+ instr_sync
+ .endif
+ .endm
+
+ .macro uaccess_enable, tmp, isb=1
+ /*
+ * Enable TTBR0 page table walks (T0SZ = 0, EDP0 = 0) and ASID from
+ * TTBR0 (A1 = 0).
+ */
+ mrc p15, 0, \tmp, c2, c0, 2 @ read TTBCR
+ bic \tmp, \tmp, #TTBCR_EPD0 | TTBCR_T0SZ_MASK
+ bic \tmp, \tmp, #TTBCR_A1
+ mcr p15, 0, \tmp, c2, c0, 2 @ write TTBCR
+ .if \isb
+ instr_sync
+ .endif
+ .endm
+
+#else
+
+ .macro uaccess_disable, tmp, isb=1
+ .endm
+
+ .macro uaccess_enable, tmp, isb=1
+ .endm
+
+#endif
+
#if defined(CONFIG_CPU_SW_DOMAIN_PAN) || defined(CONFIG_CPU_USE_DOMAINS)
#define DACR(x...) x
#else
#define DACR(x...)
#endif
+#ifdef CONFIG_CPU_TTBR0_PAN
+#define PAN(x...) x
+#else
+#define PAN(x...)
+#endif
+
/*
* Save the address limit on entry to a privileged exception.
*
@@ -86,6 +131,8 @@
.macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
DACR( mrc p15, 0, \tmp0, c3, c0, 0)
DACR( str \tmp0, [sp, #SVC_DACR])
+ PAN( mrc p15, 0, \tmp0, c2, c0, 2)
+ PAN( str \tmp0, [sp, #SVC_TTBCR])
.if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN)
/* kernel=client, user=no access */
mov \tmp2, #DACR_UACCESS_DISABLE
@@ -104,8 +151,11 @@
.macro uaccess_exit, tsk, tmp0, tmp1
DACR( ldr \tmp0, [sp, #SVC_DACR])
DACR( mcr p15, 0, \tmp0, c3, c0, 0)
+ PAN( ldr \tmp0, [sp, #SVC_TTBCR])
+ PAN( mcr p15, 0, \tmp0, c2, c0, 2)
.endm
#undef DACR
+#undef PAN
#endif /* __ASM_UACCESS_ASM_H__ */
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 9556d04387f7..6c9c16d767cf 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -14,6 +14,8 @@
#include <asm/domain.h>
#include <asm/unaligned.h>
#include <asm/unified.h>
+#include <asm/pgtable.h>
+#include <asm/proc-fns.h>
#include <asm/compiler.h>
#include <asm/extable.h>
@@ -24,9 +26,10 @@
* perform such accesses (eg, via list poison values) which could then
* be exploited for priviledge escalation.
*/
+#if defined(CONFIG_CPU_SW_DOMAIN_PAN)
+
static __always_inline unsigned int uaccess_save_and_enable(void)
{
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
unsigned int old_domain = get_domain();
/* Set the current domain access to permit user accesses */
@@ -34,19 +37,49 @@ static __always_inline unsigned int uaccess_save_and_enable(void)
domain_val(DOMAIN_USER, DOMAIN_CLIENT));
return old_domain;
-#else
- return 0;
-#endif
}
static __always_inline void uaccess_restore(unsigned int flags)
{
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/* Restore the user access mask */
set_domain(flags);
-#endif
}
+#elif defined(CONFIG_CPU_TTBR0_PAN)
+
+static __always_inline unsigned int uaccess_save_and_enable(void)
+{
+ unsigned int old_ttbcr = cpu_get_ttbcr();
+
+ /*
+ * Enable TTBR0 page table walks (T0SZ = 0, EDP0 = 0) and ASID from
+ * TTBR0 (A1 = 0).
+ */
+ cpu_set_ttbcr(old_ttbcr & ~(TTBCR_A1 | TTBCR_EPD0 | TTBCR_T0SZ_MASK));
+ isb();
+
+ return old_ttbcr;
+}
+
+static inline void uaccess_restore(unsigned int flags)
+{
+ cpu_set_ttbcr(flags);
+ isb();
+}
+
+#else
+
+static inline unsigned int uaccess_save_and_enable(void)
+{
+ return 0;
+}
+
+static inline void uaccess_restore(unsigned int flags)
+{
+}
+
+#endif
+
/*
* These two are intentionally not defined anywhere - if the kernel
* code generates any references to them, that's a bug.
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 4915662842ff..4853875740d0 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -85,6 +85,7 @@ int main(void)
DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
DEFINE(SVC_DACR, offsetof(struct svc_pt_regs, dacr));
+ DEFINE(SVC_TTBCR, offsetof(struct svc_pt_regs, ttbcr));
DEFINE(SVC_REGS_SIZE, sizeof(struct svc_pt_regs));
BLANK();
DEFINE(SIGFRAME_RC3_OFFSET, offsetof(struct sigframe, retcode[3]));
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index 3e7bcaca5e07..bc598e3d8dd2 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -271,6 +271,10 @@ ENTRY(ftrace_stub)
ret lr
ENDPROC(ftrace_stub)
+ENTRY(ftrace_stub_graph)
+ ret lr
+ENDPROC(ftrace_stub_graph)
+
#ifdef CONFIG_DYNAMIC_FTRACE
__INIT
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 054e9199f30d..a12efd0f43e8 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -17,6 +17,7 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/smp.h>
+#include <linux/cfi.h>
#include <linux/cpu_pm.h>
#include <linux/coresight.h>
@@ -903,6 +904,37 @@ unlock:
watchpoint_single_step_handler(addr);
}
+#ifdef CONFIG_CFI_CLANG
+static void hw_breakpoint_cfi_handler(struct pt_regs *regs)
+{
+ /*
+ * TODO: implementing target and type to pass to CFI using the more
+ * elaborate report_cfi_failure() requires compiler work. To be able
+ * to properly extract target information the compiler needs to
+ * emit a stable instructions sequence for the CFI checks so we can
+ * decode the instructions preceding the trap and figure out which
+ * registers were used.
+ */
+
+ switch (report_cfi_failure_noaddr(regs, instruction_pointer(regs))) {
+ case BUG_TRAP_TYPE_BUG:
+ die("Oops - CFI", regs, 0);
+ break;
+ case BUG_TRAP_TYPE_WARN:
+ /* Skip the breaking instruction */
+ instruction_pointer(regs) += 4;
+ break;
+ default:
+ die("Unknown CFI error", regs, 0);
+ break;
+ }
+}
+#else
+static void hw_breakpoint_cfi_handler(struct pt_regs *regs)
+{
+}
+#endif
+
/*
* Called from either the Data Abort Handler [watchpoint] or the
* Prefetch Abort Handler [breakpoint] with interrupts disabled.
@@ -932,6 +964,9 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
case ARM_ENTRY_SYNC_WATCHPOINT:
watchpoint_handler(addr, fsr, regs);
break;
+ case ARM_ENTRY_CFI_BREAKPOINT:
+ hw_breakpoint_cfi_handler(regs);
+ break;
default:
ret = 1; /* Unhandled fault. */
}
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..677f218f7e84 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -12,48 +12,14 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elf.h>
-#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/string.h>
-#include <linux/gfp.h>
#include <asm/sections.h>
#include <asm/smp_plat.h>
#include <asm/unwind.h>
#include <asm/opcodes.h>
-#ifdef CONFIG_XIP_KERNEL
-/*
- * The XIP kernel text is mapped in the module area for modules and
- * some other stuff to work without any indirect relocations.
- * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
- * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
- */
-#undef MODULES_VADDR
-#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
-#endif
-
-#ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- /* Silence the initial allocation */
- if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
- gfp_mask |= __GFP_NOWARN;
-
- p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
- return p;
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
bool module_init_section(const char *name)
{
return strstarts(name, ".init") ||
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index c3ec3861dd07..58a6441b58c4 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -12,6 +12,7 @@
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
extern void cpu_resume_mmu(void);
@@ -27,6 +28,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
return -EINVAL;
/*
+ * Needed for the MMU disabling/enabing code to be able to run from
+ * TTBR0 addresses.
+ */
+ if (IS_ENABLED(CONFIG_CPU_TTBR0_PAN))
+ uaccess_save_and_enable();
+
+ /*
* Function graph tracer state gets incosistent when the kernel
* calls functions that never return (aka suspend finishers) hence
* disable graph tracing during their execution.
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 6928781e6bee..c289bde04743 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -13,7 +13,8 @@
.text
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+#if defined(CONFIG_CPU_SW_DOMAIN_PAN)
+
.macro save_regs
mrc p15, 0, ip, c3, c0, 0
stmfd sp!, {r1, r2, r4 - r8, ip, lr}
@@ -25,7 +26,23 @@
mcr p15, 0, ip, c3, c0, 0
ret lr
.endm
+
+#elif defined(CONFIG_CPU_TTBR0_PAN)
+
+ .macro save_regs
+ mrc p15, 0, ip, c2, c0, 2 @ read TTBCR
+ stmfd sp!, {r1, r2, r4 - r8, ip, lr}
+ uaccess_enable ip
+ .endm
+
+ .macro load_regs
+ ldmfd sp!, {r1, r2, r4 - r8, ip, lr}
+ mcr p15, 0, ip, c2, c0, 2 @ restore TTBCR
+ ret lr
+ .endm
+
#else
+
.macro save_regs
stmfd sp!, {r1, r2, r4 - r8, lr}
.endm
@@ -33,6 +50,7 @@
.macro load_regs
ldmfd sp!, {r1, r2, r4 - r8, pc}
.endm
+
#endif
.macro load1b, reg1
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
index 3ac05177d097..33b08ca1c242 100644
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -5,6 +5,7 @@
* Copyright (C) 1995, 1996 Russell King
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/delay.h>
@@ -24,21 +25,26 @@
* HZ <= 1000
*/
-ENTRY(__loop_udelay)
+SYM_TYPED_FUNC_START(__loop_udelay)
ldr r2, .LC1
mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT
-ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0
+ b __loop_const_udelay
+SYM_FUNC_END(__loop_udelay)
+
+SYM_TYPED_FUNC_START(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0
ldr r2, .LC0
ldr r2, [r2]
umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy
adds r1, r1, #0xffffffff @ rounding up ...
adcs r0, r0, r0 @ and right shift by 31
reteq lr
+ b __loop_delay
+SYM_FUNC_END(__loop_const_udelay)
.align 3
@ Delay routine
-ENTRY(__loop_delay)
+SYM_TYPED_FUNC_START(__loop_delay)
subs r0, r0, #1
#if 0
retls lr
@@ -58,6 +64,4 @@ ENTRY(__loop_delay)
#endif
bhi __loop_delay
ret lr
-ENDPROC(__loop_udelay)
-ENDPROC(__loop_const_udelay)
-ENDPROC(__loop_delay)
+SYM_FUNC_END(__loop_delay)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 71b858c9b10c..f1f231f20ff9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o
obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o
obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o
obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o
+obj-y += cache.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o
@@ -62,6 +63,7 @@ obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions
obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o
obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o
obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o
+obj-y += tlb.o
obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o
obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o
@@ -88,6 +90,7 @@ obj-$(CONFIG_CPU_V6) += proc-v6.o
obj-$(CONFIG_CPU_V6K) += proc-v6.o
obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o
obj-$(CONFIG_CPU_V7M) += proc-v7m.o
+obj-$(CONFIG_CFI_CLANG) += proc.o
obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o
diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c
index 9c1172f26885..6f63b90f9e1a 100644
--- a/arch/arm/mm/cache-b15-rac.c
+++ b/arch/arm/mm/cache-b15-rac.c
@@ -5,6 +5,7 @@
* Copyright (C) 2015-2016 Broadcom
*/
+#include <linux/cfi_types.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/io.h>
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 71c64e92dead..4a3668b52a2d 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S
@@ -12,6 +12,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/page.h>
@@ -39,11 +40,11 @@
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(fa_flush_icache_all)
+SYM_TYPED_FUNC_START(fa_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(fa_flush_icache_all)
+SYM_FUNC_END(fa_flush_icache_all)
/*
* flush_user_cache_all()
@@ -51,14 +52,14 @@ ENDPROC(fa_flush_icache_all)
* Clean and invalidate all cache entries in a particular address
* space.
*/
-ENTRY(fa_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(fa_flush_user_cache_all, fa_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(fa_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(fa_flush_kern_cache_all)
mov ip, #0
mov r2, #VM_EXEC
__flush_whole_cache:
@@ -69,6 +70,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(fa_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -80,7 +82,7 @@ __flush_whole_cache:
* - end - end address (exclusive, page aligned)
* - flags - vma_area_struct flags describing address space
*/
-ENTRY(fa_flush_user_cache_range)
+SYM_TYPED_FUNC_START(fa_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT @ total size >= limit?
@@ -97,6 +99,7 @@ ENTRY(fa_flush_user_cache_range)
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(fa_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -108,8 +111,11 @@ ENTRY(fa_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(fa_coherent_kern_range)
- /* fall through */
+SYM_TYPED_FUNC_START(fa_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b fa_coherent_user_range
+#endif
+SYM_FUNC_END(fa_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -121,7 +127,7 @@ ENTRY(fa_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(fa_coherent_user_range)
+SYM_TYPED_FUNC_START(fa_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -133,6 +139,7 @@ ENTRY(fa_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(fa_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -143,7 +150,7 @@ ENTRY(fa_coherent_user_range)
* - addr - kernel address
* - size - size of region
*/
-ENTRY(fa_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(fa_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
add r0, r0, #CACHE_DLINESIZE
@@ -153,6 +160,7 @@ ENTRY(fa_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
ret lr
+SYM_FUNC_END(fa_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -203,7 +211,7 @@ fa_dma_clean_range:
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(fa_dma_flush_range)
+SYM_TYPED_FUNC_START(fa_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -212,6 +220,7 @@ ENTRY(fa_dma_flush_range)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
ret lr
+SYM_FUNC_END(fa_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -219,13 +228,13 @@ ENTRY(fa_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(fa_dma_map_area)
+SYM_TYPED_FUNC_START(fa_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq fa_dma_clean_range
bcs fa_dma_inv_range
b fa_dma_flush_range
-ENDPROC(fa_dma_map_area)
+SYM_FUNC_END(fa_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -233,14 +242,6 @@ ENDPROC(fa_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(fa_dma_unmap_area)
+SYM_TYPED_FUNC_START(fa_dma_unmap_area)
ret lr
-ENDPROC(fa_dma_unmap_area)
-
- .globl fa_flush_kern_cache_louis
- .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions fa
+SYM_FUNC_END(fa_dma_unmap_area)
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
index 72d939ef8798..f68dde2014ee 100644
--- a/arch/arm/mm/cache-nop.S
+++ b/arch/arm/mm/cache-nop.S
@@ -1,47 +1,52 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include "proc-macros.S"
-ENTRY(nop_flush_icache_all)
+/*
+ * These are all open-coded instead of aliased, to make clear
+ * what is going on here: all functions are stubbed out.
+ */
+SYM_TYPED_FUNC_START(nop_flush_icache_all)
ret lr
-ENDPROC(nop_flush_icache_all)
+SYM_FUNC_END(nop_flush_icache_all)
- .globl nop_flush_kern_cache_all
- .equ nop_flush_kern_cache_all, nop_flush_icache_all
-
- .globl nop_flush_kern_cache_louis
- .equ nop_flush_kern_cache_louis, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_flush_kern_cache_all)
+ ret lr
+SYM_FUNC_END(nop_flush_kern_cache_all)
- .globl nop_flush_user_cache_all
- .equ nop_flush_user_cache_all, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_flush_user_cache_all)
+ ret lr
+SYM_FUNC_END(nop_flush_user_cache_all)
- .globl nop_flush_user_cache_range
- .equ nop_flush_user_cache_range, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_flush_user_cache_range)
+ ret lr
+SYM_FUNC_END(nop_flush_user_cache_range)
- .globl nop_coherent_kern_range
- .equ nop_coherent_kern_range, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_coherent_kern_range)
+ ret lr
+SYM_FUNC_END(nop_coherent_kern_range)
-ENTRY(nop_coherent_user_range)
+SYM_TYPED_FUNC_START(nop_coherent_user_range)
mov r0, 0
ret lr
-ENDPROC(nop_coherent_user_range)
-
- .globl nop_flush_kern_dcache_area
- .equ nop_flush_kern_dcache_area, nop_flush_icache_all
+SYM_FUNC_END(nop_coherent_user_range)
- .globl nop_dma_flush_range
- .equ nop_dma_flush_range, nop_flush_icache_all
-
- .globl nop_dma_map_area
- .equ nop_dma_map_area, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_flush_kern_dcache_area)
+ ret lr
+SYM_FUNC_END(nop_flush_kern_dcache_area)
- .globl nop_dma_unmap_area
- .equ nop_dma_unmap_area, nop_flush_icache_all
+SYM_TYPED_FUNC_START(nop_dma_flush_range)
+ ret lr
+SYM_FUNC_END(nop_dma_flush_range)
- __INITDATA
+SYM_TYPED_FUNC_START(nop_dma_map_area)
+ ret lr
+SYM_FUNC_END(nop_dma_map_area)
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions nop
+SYM_TYPED_FUNC_START(nop_dma_unmap_area)
+ ret lr
+SYM_FUNC_END(nop_dma_unmap_area)
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 7787057e4990..0e94e5193dbd 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -15,9 +16,9 @@
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(v4_flush_icache_all)
+SYM_TYPED_FUNC_START(v4_flush_icache_all)
ret lr
-ENDPROC(v4_flush_icache_all)
+SYM_FUNC_END(v4_flush_icache_all)
/*
* flush_user_cache_all()
@@ -27,21 +28,22 @@ ENDPROC(v4_flush_icache_all)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v4_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(v4_flush_user_cache_all, v4_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(v4_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v4_flush_kern_cache_all)
#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
ret lr
#else
- /* FALLTHROUGH */
+ ret lr
#endif
+SYM_FUNC_END(v4_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -53,14 +55,15 @@ ENTRY(v4_flush_kern_cache_all)
* - end - end address (exclusive, may not be aligned)
* - flags - vma_area_struct flags describing address space
*/
-ENTRY(v4_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v4_flush_user_cache_range)
#ifdef CONFIG_CPU_CP15
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ flush ID cache
ret lr
#else
- /* FALLTHROUGH */
+ ret lr
#endif
+SYM_FUNC_END(v4_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -72,8 +75,9 @@ ENTRY(v4_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(v4_coherent_kern_range)
+ ret lr
+SYM_FUNC_END(v4_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -85,9 +89,10 @@ ENTRY(v4_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4_coherent_user_range)
+SYM_TYPED_FUNC_START(v4_coherent_user_range)
mov r0, #0
ret lr
+SYM_FUNC_END(v4_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -98,8 +103,11 @@ ENTRY(v4_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v4_flush_kern_dcache_area)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(v4_flush_kern_dcache_area)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v4_dma_flush_range
+#endif
+SYM_FUNC_END(v4_flush_kern_dcache_area)
/*
* dma_flush_range(start, end)
@@ -109,12 +117,13 @@ ENTRY(v4_flush_kern_dcache_area)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4_dma_flush_range)
+SYM_TYPED_FUNC_START(v4_dma_flush_range)
#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
#endif
ret lr
+SYM_FUNC_END(v4_dma_flush_range)
/*
* dma_unmap_area(start, size, dir)
@@ -122,10 +131,11 @@ ENTRY(v4_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4_dma_unmap_area)
+SYM_TYPED_FUNC_START(v4_dma_unmap_area)
teq r2, #DMA_TO_DEVICE
bne v4_dma_flush_range
- /* FALLTHROUGH */
+ ret lr
+SYM_FUNC_END(v4_dma_unmap_area)
/*
* dma_map_area(start, size, dir)
@@ -133,15 +143,6 @@ ENTRY(v4_dma_unmap_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4_dma_map_area)
+SYM_TYPED_FUNC_START(v4_dma_map_area)
ret lr
-ENDPROC(v4_dma_unmap_area)
-ENDPROC(v4_dma_map_area)
-
- .globl v4_flush_kern_cache_louis
- .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v4
+SYM_FUNC_END(v4_dma_map_area)
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index ad382cee0fdb..ce55a2eef5da 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -53,11 +54,11 @@ flush_base:
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(v4wb_flush_icache_all)
+SYM_TYPED_FUNC_START(v4wb_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(v4wb_flush_icache_all)
+SYM_FUNC_END(v4wb_flush_icache_all)
/*
* flush_user_cache_all()
@@ -65,14 +66,14 @@ ENDPROC(v4wb_flush_icache_all)
* Clean and invalidate all cache entries in a particular address
* space.
*/
-ENTRY(v4wb_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(v4wb_flush_user_cache_all, v4wb_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(v4wb_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v4wb_flush_kern_cache_all)
mov ip, #0
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
__flush_whole_cache:
@@ -93,6 +94,7 @@ __flush_whole_cache:
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain write buffer
ret lr
+SYM_FUNC_END(v4wb_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -104,7 +106,7 @@ __flush_whole_cache:
* - end - end address (exclusive, page aligned)
* - flags - vma_area_struct flags describing address space
*/
-ENTRY(v4wb_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v4wb_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
tst r2, #VM_EXEC @ executable region?
@@ -121,6 +123,7 @@ ENTRY(v4wb_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
ret lr
+SYM_FUNC_END(v4wb_flush_user_cache_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -131,9 +134,12 @@ ENTRY(v4wb_flush_user_cache_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v4wb_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(v4wb_flush_kern_dcache_area)
add r1, r0, r1
- /* fall through */
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v4wb_coherent_user_range
+#endif
+SYM_FUNC_END(v4wb_flush_kern_dcache_area)
/*
* coherent_kern_range(start, end)
@@ -145,8 +151,11 @@ ENTRY(v4wb_flush_kern_dcache_area)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wb_coherent_kern_range)
- /* fall through */
+SYM_TYPED_FUNC_START(v4wb_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v4wb_coherent_user_range
+#endif
+SYM_FUNC_END(v4wb_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -158,7 +167,7 @@ ENTRY(v4wb_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wb_coherent_user_range)
+SYM_TYPED_FUNC_START(v4wb_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
@@ -169,7 +178,7 @@ ENTRY(v4wb_coherent_user_range)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
-
+SYM_FUNC_END(v4wb_coherent_user_range)
/*
* dma_inv_range(start, end)
@@ -231,13 +240,13 @@ v4wb_dma_clean_range:
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4wb_dma_map_area)
+SYM_TYPED_FUNC_START(v4wb_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq v4wb_dma_clean_range
bcs v4wb_dma_inv_range
b v4wb_dma_flush_range
-ENDPROC(v4wb_dma_map_area)
+SYM_FUNC_END(v4wb_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -245,14 +254,6 @@ ENDPROC(v4wb_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4wb_dma_unmap_area)
+SYM_TYPED_FUNC_START(v4wb_dma_unmap_area)
ret lr
-ENDPROC(v4wb_dma_unmap_area)
-
- .globl v4wb_flush_kern_cache_louis
- .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v4wb
+SYM_FUNC_END(v4wb_dma_unmap_area)
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 0b290c25a99d..a97dc267b3b0 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -43,11 +44,11 @@
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(v4wt_flush_icache_all)
+SYM_TYPED_FUNC_START(v4wt_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(v4wt_flush_icache_all)
+SYM_FUNC_END(v4wt_flush_icache_all)
/*
* flush_user_cache_all()
@@ -55,14 +56,14 @@ ENDPROC(v4wt_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(v4wt_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(v4wt_flush_user_cache_all, v4wt_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(v4wt_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v4wt_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -70,6 +71,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
ret lr
+SYM_FUNC_END(v4wt_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -81,7 +83,7 @@ __flush_whole_cache:
* - end - end address (exclusive, page aligned)
* - flags - vma_area_struct flags describing address space
*/
-ENTRY(v4wt_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v4wt_flush_user_cache_range)
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
bhs __flush_whole_cache
@@ -93,6 +95,7 @@ ENTRY(v4wt_flush_user_cache_range)
cmp r0, r1
blo 1b
ret lr
+SYM_FUNC_END(v4wt_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -104,8 +107,11 @@ ENTRY(v4wt_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wt_coherent_kern_range)
- /* FALLTRHOUGH */
+SYM_TYPED_FUNC_START(v4wt_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v4wt_coherent_user_range
+#endif
+SYM_FUNC_END(v4wt_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -117,7 +123,7 @@ ENTRY(v4wt_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wt_coherent_user_range)
+SYM_TYPED_FUNC_START(v4wt_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
add r0, r0, #CACHE_DLINESIZE
@@ -125,6 +131,7 @@ ENTRY(v4wt_coherent_user_range)
blo 1b
mov r0, #0
ret lr
+SYM_FUNC_END(v4wt_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -135,11 +142,12 @@ ENTRY(v4wt_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v4wt_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(v4wt_flush_kern_dcache_area)
mov r2, #0
mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache
add r1, r0, r1
- /* fallthrough */
+ b v4wt_dma_inv_range
+SYM_FUNC_END(v4wt_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -167,9 +175,10 @@ v4wt_dma_inv_range:
*
* - start - virtual start address
* - end - virtual end address
- */
- .globl v4wt_dma_flush_range
- .equ v4wt_dma_flush_range, v4wt_dma_inv_range
+*/
+SYM_TYPED_FUNC_START(v4wt_dma_flush_range)
+ b v4wt_dma_inv_range
+SYM_FUNC_END(v4wt_dma_flush_range)
/*
* dma_unmap_area(start, size, dir)
@@ -177,11 +186,12 @@ v4wt_dma_inv_range:
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4wt_dma_unmap_area)
+SYM_TYPED_FUNC_START(v4wt_dma_unmap_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v4wt_dma_inv_range
- /* FALLTHROUGH */
+ ret lr
+SYM_FUNC_END(v4wt_dma_unmap_area)
/*
* dma_map_area(start, size, dir)
@@ -189,15 +199,6 @@ ENTRY(v4wt_dma_unmap_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v4wt_dma_map_area)
+SYM_TYPED_FUNC_START(v4wt_dma_map_area)
ret lr
-ENDPROC(v4wt_dma_unmap_area)
-ENDPROC(v4wt_dma_map_area)
-
- .globl v4wt_flush_kern_cache_louis
- .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v4wt
+SYM_FUNC_END(v4wt_dma_map_area)
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 44211d8a296f..9f415476e218 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
@@ -34,7 +35,7 @@
* r0 - set to 0
* r1 - corrupted
*/
-ENTRY(v6_flush_icache_all)
+SYM_TYPED_FUNC_START(v6_flush_icache_all)
mov r0, #0
#ifdef CONFIG_ARM_ERRATA_411920
mrs r1, cpsr
@@ -51,7 +52,7 @@ ENTRY(v6_flush_icache_all)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache
#endif
ret lr
-ENDPROC(v6_flush_icache_all)
+SYM_FUNC_END(v6_flush_icache_all)
/*
* v6_flush_cache_all()
@@ -60,7 +61,7 @@ ENDPROC(v6_flush_icache_all)
*
* It is assumed that:
*/
-ENTRY(v6_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v6_flush_kern_cache_all)
mov r0, #0
#ifdef HARVARD_CACHE
mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate
@@ -73,6 +74,7 @@ ENTRY(v6_flush_kern_cache_all)
mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate
#endif
ret lr
+SYM_FUNC_END(v6_flush_kern_cache_all)
/*
* v6_flush_cache_all()
@@ -81,8 +83,9 @@ ENTRY(v6_flush_kern_cache_all)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v6_flush_user_cache_all)
- /*FALLTHROUGH*/
+SYM_TYPED_FUNC_START(v6_flush_user_cache_all)
+ ret lr
+SYM_FUNC_END(v6_flush_user_cache_all)
/*
* v6_flush_cache_range(start, end, flags)
@@ -96,8 +99,9 @@ ENTRY(v6_flush_user_cache_all)
* It is assumed that:
* - we have a VIPT cache.
*/
-ENTRY(v6_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v6_flush_user_cache_range)
ret lr
+SYM_FUNC_END(v6_flush_user_cache_range)
/*
* v6_coherent_kern_range(start,end)
@@ -112,8 +116,11 @@ ENTRY(v6_flush_user_cache_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v6_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(v6_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v6_coherent_user_range
+#endif
+SYM_FUNC_END(v6_coherent_kern_range)
/*
* v6_coherent_user_range(start,end)
@@ -128,7 +135,7 @@ ENTRY(v6_coherent_kern_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v6_coherent_user_range)
+SYM_TYPED_FUNC_START(v6_coherent_user_range)
UNWIND(.fnstart )
#ifdef HARVARD_CACHE
bic r0, r0, #CACHE_LINE_SIZE - 1
@@ -159,8 +166,7 @@ ENTRY(v6_coherent_user_range)
mov r0, #-EFAULT
ret lr
UNWIND(.fnend )
-ENDPROC(v6_coherent_user_range)
-ENDPROC(v6_coherent_kern_range)
+SYM_FUNC_END(v6_coherent_user_range)
/*
* v6_flush_kern_dcache_area(void *addr, size_t size)
@@ -171,7 +177,7 @@ ENDPROC(v6_coherent_kern_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v6_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(v6_flush_kern_dcache_area)
add r1, r0, r1
bic r0, r0, #D_CACHE_LINE_SIZE - 1
1:
@@ -188,7 +194,7 @@ ENTRY(v6_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c10, 4
#endif
ret lr
-
+SYM_FUNC_END(v6_flush_kern_dcache_area)
/*
* v6_dma_inv_range(start,end)
@@ -253,7 +259,7 @@ v6_dma_clean_range:
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v6_dma_flush_range)
+SYM_TYPED_FUNC_START(v6_dma_flush_range)
bic r0, r0, #D_CACHE_LINE_SIZE - 1
1:
#ifdef HARVARD_CACHE
@@ -267,6 +273,7 @@ ENTRY(v6_dma_flush_range)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
ret lr
+SYM_FUNC_END(v6_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -274,12 +281,12 @@ ENTRY(v6_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v6_dma_map_area)
+SYM_TYPED_FUNC_START(v6_dma_map_area)
add r1, r1, r0
teq r2, #DMA_FROM_DEVICE
beq v6_dma_inv_range
b v6_dma_clean_range
-ENDPROC(v6_dma_map_area)
+SYM_FUNC_END(v6_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -287,17 +294,9 @@ ENDPROC(v6_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v6_dma_unmap_area)
+SYM_TYPED_FUNC_START(v6_dma_unmap_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v6_dma_inv_range
ret lr
-ENDPROC(v6_dma_unmap_area)
-
- .globl v6_flush_kern_cache_louis
- .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v6
+SYM_FUNC_END(v6_dma_unmap_area)
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 127afe2096ba..201ca05436fa 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
@@ -80,12 +81,12 @@ ENDPROC(v7_invalidate_l1)
* Registers:
* r0 - set to 0
*/
-ENTRY(v7_flush_icache_all)
+SYM_TYPED_FUNC_START(v7_flush_icache_all)
mov r0, #0
ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ret lr
-ENDPROC(v7_flush_icache_all)
+SYM_FUNC_END(v7_flush_icache_all)
/*
* v7_flush_dcache_louis()
@@ -193,7 +194,7 @@ ENDPROC(v7_flush_dcache_all)
* unification in a single instruction.
*
*/
-ENTRY(v7_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v7_flush_kern_cache_all)
stmfd sp!, {r4-r6, r9-r10, lr}
bl v7_flush_dcache_all
mov r0, #0
@@ -201,7 +202,7 @@ ENTRY(v7_flush_kern_cache_all)
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ldmfd sp!, {r4-r6, r9-r10, lr}
ret lr
-ENDPROC(v7_flush_kern_cache_all)
+SYM_FUNC_END(v7_flush_kern_cache_all)
/*
* v7_flush_kern_cache_louis(void)
@@ -209,7 +210,7 @@ ENDPROC(v7_flush_kern_cache_all)
* Flush the data cache up to Level of Unification Inner Shareable.
* Invalidate the I-cache to the point of unification.
*/
-ENTRY(v7_flush_kern_cache_louis)
+SYM_TYPED_FUNC_START(v7_flush_kern_cache_louis)
stmfd sp!, {r4-r6, r9-r10, lr}
bl v7_flush_dcache_louis
mov r0, #0
@@ -217,7 +218,7 @@ ENTRY(v7_flush_kern_cache_louis)
ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
ldmfd sp!, {r4-r6, r9-r10, lr}
ret lr
-ENDPROC(v7_flush_kern_cache_louis)
+SYM_FUNC_END(v7_flush_kern_cache_louis)
/*
* v7_flush_cache_all()
@@ -226,8 +227,9 @@ ENDPROC(v7_flush_kern_cache_louis)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v7_flush_user_cache_all)
- /*FALLTHROUGH*/
+SYM_TYPED_FUNC_START(v7_flush_user_cache_all)
+ ret lr
+SYM_FUNC_END(v7_flush_user_cache_all)
/*
* v7_flush_cache_range(start, end, flags)
@@ -241,10 +243,9 @@ ENTRY(v7_flush_user_cache_all)
* It is assumed that:
* - we have a VIPT cache.
*/
-ENTRY(v7_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v7_flush_user_cache_range)
ret lr
-ENDPROC(v7_flush_user_cache_all)
-ENDPROC(v7_flush_user_cache_range)
+SYM_FUNC_END(v7_flush_user_cache_range)
/*
* v7_coherent_kern_range(start,end)
@@ -259,8 +260,11 @@ ENDPROC(v7_flush_user_cache_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v7_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(v7_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v7_coherent_user_range
+#endif
+SYM_FUNC_END(v7_coherent_kern_range)
/*
* v7_coherent_user_range(start,end)
@@ -275,7 +279,7 @@ ENTRY(v7_coherent_kern_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v7_coherent_user_range)
+SYM_TYPED_FUNC_START(v7_coherent_user_range)
UNWIND(.fnstart )
dcache_line_size r2, r3
sub r3, r2, #1
@@ -321,8 +325,7 @@ ENTRY(v7_coherent_user_range)
mov r0, #-EFAULT
ret lr
UNWIND(.fnend )
-ENDPROC(v7_coherent_kern_range)
-ENDPROC(v7_coherent_user_range)
+SYM_FUNC_END(v7_coherent_user_range)
/*
* v7_flush_kern_dcache_area(void *addr, size_t size)
@@ -333,7 +336,7 @@ ENDPROC(v7_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v7_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(v7_flush_kern_dcache_area)
dcache_line_size r2, r3
add r1, r0, r1
sub r3, r2, #1
@@ -349,7 +352,7 @@ ENTRY(v7_flush_kern_dcache_area)
blo 1b
dsb st
ret lr
-ENDPROC(v7_flush_kern_dcache_area)
+SYM_FUNC_END(v7_flush_kern_dcache_area)
/*
* v7_dma_inv_range(start,end)
@@ -413,7 +416,7 @@ ENDPROC(v7_dma_clean_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v7_dma_flush_range)
+SYM_TYPED_FUNC_START(v7_dma_flush_range)
dcache_line_size r2, r3
sub r3, r2, #1
bic r0, r0, r3
@@ -428,7 +431,7 @@ ENTRY(v7_dma_flush_range)
blo 1b
dsb st
ret lr
-ENDPROC(v7_dma_flush_range)
+SYM_FUNC_END(v7_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -436,12 +439,12 @@ ENDPROC(v7_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v7_dma_map_area)
+SYM_TYPED_FUNC_START(v7_dma_map_area)
add r1, r1, r0
teq r2, #DMA_FROM_DEVICE
beq v7_dma_inv_range
b v7_dma_clean_range
-ENDPROC(v7_dma_map_area)
+SYM_FUNC_END(v7_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -449,34 +452,9 @@ ENDPROC(v7_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v7_dma_unmap_area)
+SYM_TYPED_FUNC_START(v7_dma_unmap_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v7_dma_inv_range
ret lr
-ENDPROC(v7_dma_unmap_area)
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v7
-
- /* The Broadcom Brahma-B15 read-ahead cache requires some modifications
- * to the v7_cache_fns, we only override the ones we need
- */
-#ifndef CONFIG_CACHE_B15_RAC
- globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all
-#endif
- globl_equ b15_flush_icache_all, v7_flush_icache_all
- globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis
- globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all
- globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range
- globl_equ b15_coherent_kern_range, v7_coherent_kern_range
- globl_equ b15_coherent_user_range, v7_coherent_user_range
- globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area
-
- globl_equ b15_dma_map_area, v7_dma_map_area
- globl_equ b15_dma_unmap_area, v7_dma_unmap_area
- globl_equ b15_dma_flush_range, v7_dma_flush_range
-
- define_cache_functions b15
+SYM_FUNC_END(v7_dma_unmap_area)
diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
index eb60b5e5e2ad..14d719eba729 100644
--- a/arch/arm/mm/cache-v7m.S
+++ b/arch/arm/mm/cache-v7m.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
@@ -159,10 +160,10 @@ ENDPROC(v7m_invalidate_l1)
* Registers:
* r0 - set to 0
*/
-ENTRY(v7m_flush_icache_all)
+SYM_TYPED_FUNC_START(v7m_flush_icache_all)
invalidate_icache r0
ret lr
-ENDPROC(v7m_flush_icache_all)
+SYM_FUNC_END(v7m_flush_icache_all)
/*
* v7m_flush_dcache_all()
@@ -236,13 +237,13 @@ ENDPROC(v7m_flush_dcache_all)
* unification in a single instruction.
*
*/
-ENTRY(v7m_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(v7m_flush_kern_cache_all)
stmfd sp!, {r4-r7, r9-r11, lr}
bl v7m_flush_dcache_all
invalidate_icache r0
ldmfd sp!, {r4-r7, r9-r11, lr}
ret lr
-ENDPROC(v7m_flush_kern_cache_all)
+SYM_FUNC_END(v7m_flush_kern_cache_all)
/*
* v7m_flush_cache_all()
@@ -251,8 +252,9 @@ ENDPROC(v7m_flush_kern_cache_all)
*
* - mm - mm_struct describing address space
*/
-ENTRY(v7m_flush_user_cache_all)
- /*FALLTHROUGH*/
+SYM_TYPED_FUNC_START(v7m_flush_user_cache_all)
+ ret lr
+SYM_FUNC_END(v7m_flush_user_cache_all)
/*
* v7m_flush_cache_range(start, end, flags)
@@ -266,10 +268,9 @@ ENTRY(v7m_flush_user_cache_all)
* It is assumed that:
* - we have a VIPT cache.
*/
-ENTRY(v7m_flush_user_cache_range)
+SYM_TYPED_FUNC_START(v7m_flush_user_cache_range)
ret lr
-ENDPROC(v7m_flush_user_cache_all)
-ENDPROC(v7m_flush_user_cache_range)
+SYM_FUNC_END(v7m_flush_user_cache_range)
/*
* v7m_coherent_kern_range(start,end)
@@ -284,8 +285,11 @@ ENDPROC(v7m_flush_user_cache_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v7m_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(v7m_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b v7m_coherent_user_range
+#endif
+SYM_FUNC_END(v7m_coherent_kern_range)
/*
* v7m_coherent_user_range(start,end)
@@ -300,7 +304,7 @@ ENTRY(v7m_coherent_kern_range)
* It is assumed that:
* - the Icache does not read data from the write buffer
*/
-ENTRY(v7m_coherent_user_range)
+SYM_TYPED_FUNC_START(v7m_coherent_user_range)
UNWIND(.fnstart )
dcache_line_size r2, r3
sub r3, r2, #1
@@ -328,8 +332,7 @@ ENTRY(v7m_coherent_user_range)
isb
ret lr
UNWIND(.fnend )
-ENDPROC(v7m_coherent_kern_range)
-ENDPROC(v7m_coherent_user_range)
+SYM_FUNC_END(v7m_coherent_user_range)
/*
* v7m_flush_kern_dcache_area(void *addr, size_t size)
@@ -340,7 +343,7 @@ ENDPROC(v7m_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(v7m_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(v7m_flush_kern_dcache_area)
dcache_line_size r2, r3
add r1, r0, r1
sub r3, r2, #1
@@ -352,7 +355,7 @@ ENTRY(v7m_flush_kern_dcache_area)
blo 1b
dsb st
ret lr
-ENDPROC(v7m_flush_kern_dcache_area)
+SYM_FUNC_END(v7m_flush_kern_dcache_area)
/*
* v7m_dma_inv_range(start,end)
@@ -408,7 +411,7 @@ ENDPROC(v7m_dma_clean_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v7m_dma_flush_range)
+SYM_TYPED_FUNC_START(v7m_dma_flush_range)
dcache_line_size r2, r3
sub r3, r2, #1
bic r0, r0, r3
@@ -419,7 +422,7 @@ ENTRY(v7m_dma_flush_range)
blo 1b
dsb st
ret lr
-ENDPROC(v7m_dma_flush_range)
+SYM_FUNC_END(v7m_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -427,12 +430,12 @@ ENDPROC(v7m_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v7m_dma_map_area)
+SYM_TYPED_FUNC_START(v7m_dma_map_area)
add r1, r1, r0
teq r2, #DMA_FROM_DEVICE
beq v7m_dma_inv_range
b v7m_dma_clean_range
-ENDPROC(v7m_dma_map_area)
+SYM_FUNC_END(v7m_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -440,17 +443,9 @@ ENDPROC(v7m_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(v7m_dma_unmap_area)
+SYM_TYPED_FUNC_START(v7m_dma_unmap_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
bne v7m_dma_inv_range
ret lr
-ENDPROC(v7m_dma_unmap_area)
-
- .globl v7m_flush_kern_cache_louis
- .equ v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all
-
- __INITDATA
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions v7m
+SYM_FUNC_END(v7m_dma_unmap_area)
diff --git a/arch/arm/mm/cache.c b/arch/arm/mm/cache.c
new file mode 100644
index 000000000000..e6fbc599c9ed
--- /dev/null
+++ b/arch/arm/mm/cache.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file defines C prototypes for the low-level cache assembly functions
+ * and populates a vtable for each selected ARM CPU cache type.
+ */
+
+#include <linux/types.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_CPU_CACHE_V4
+void v4_flush_icache_all(void);
+void v4_flush_kern_cache_all(void);
+void v4_flush_user_cache_all(void);
+void v4_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v4_coherent_kern_range(unsigned long, unsigned long);
+int v4_coherent_user_range(unsigned long, unsigned long);
+void v4_flush_kern_dcache_area(void *, size_t);
+void v4_dma_map_area(const void *, size_t, int);
+void v4_dma_unmap_area(const void *, size_t, int);
+void v4_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v4_cache_fns __initconst = {
+ .flush_icache_all = v4_flush_icache_all,
+ .flush_kern_all = v4_flush_kern_cache_all,
+ .flush_kern_louis = v4_flush_kern_cache_all,
+ .flush_user_all = v4_flush_user_cache_all,
+ .flush_user_range = v4_flush_user_cache_range,
+ .coherent_kern_range = v4_coherent_kern_range,
+ .coherent_user_range = v4_coherent_user_range,
+ .flush_kern_dcache_area = v4_flush_kern_dcache_area,
+ .dma_map_area = v4_dma_map_area,
+ .dma_unmap_area = v4_dma_unmap_area,
+ .dma_flush_range = v4_dma_flush_range,
+};
+#endif
+
+/* V4 write-back cache "V4WB" */
+#ifdef CONFIG_CPU_CACHE_V4WB
+void v4wb_flush_icache_all(void);
+void v4wb_flush_kern_cache_all(void);
+void v4wb_flush_user_cache_all(void);
+void v4wb_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v4wb_coherent_kern_range(unsigned long, unsigned long);
+int v4wb_coherent_user_range(unsigned long, unsigned long);
+void v4wb_flush_kern_dcache_area(void *, size_t);
+void v4wb_dma_map_area(const void *, size_t, int);
+void v4wb_dma_unmap_area(const void *, size_t, int);
+void v4wb_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v4wb_cache_fns __initconst = {
+ .flush_icache_all = v4wb_flush_icache_all,
+ .flush_kern_all = v4wb_flush_kern_cache_all,
+ .flush_kern_louis = v4wb_flush_kern_cache_all,
+ .flush_user_all = v4wb_flush_user_cache_all,
+ .flush_user_range = v4wb_flush_user_cache_range,
+ .coherent_kern_range = v4wb_coherent_kern_range,
+ .coherent_user_range = v4wb_coherent_user_range,
+ .flush_kern_dcache_area = v4wb_flush_kern_dcache_area,
+ .dma_map_area = v4wb_dma_map_area,
+ .dma_unmap_area = v4wb_dma_unmap_area,
+ .dma_flush_range = v4wb_dma_flush_range,
+};
+#endif
+
+/* V4 write-through cache "V4WT" */
+#ifdef CONFIG_CPU_CACHE_V4WT
+void v4wt_flush_icache_all(void);
+void v4wt_flush_kern_cache_all(void);
+void v4wt_flush_user_cache_all(void);
+void v4wt_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v4wt_coherent_kern_range(unsigned long, unsigned long);
+int v4wt_coherent_user_range(unsigned long, unsigned long);
+void v4wt_flush_kern_dcache_area(void *, size_t);
+void v4wt_dma_map_area(const void *, size_t, int);
+void v4wt_dma_unmap_area(const void *, size_t, int);
+void v4wt_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v4wt_cache_fns __initconst = {
+ .flush_icache_all = v4wt_flush_icache_all,
+ .flush_kern_all = v4wt_flush_kern_cache_all,
+ .flush_kern_louis = v4wt_flush_kern_cache_all,
+ .flush_user_all = v4wt_flush_user_cache_all,
+ .flush_user_range = v4wt_flush_user_cache_range,
+ .coherent_kern_range = v4wt_coherent_kern_range,
+ .coherent_user_range = v4wt_coherent_user_range,
+ .flush_kern_dcache_area = v4wt_flush_kern_dcache_area,
+ .dma_map_area = v4wt_dma_map_area,
+ .dma_unmap_area = v4wt_dma_unmap_area,
+ .dma_flush_range = v4wt_dma_flush_range,
+};
+#endif
+
+/* Faraday FA526 cache */
+#ifdef CONFIG_CPU_CACHE_FA
+void fa_flush_icache_all(void);
+void fa_flush_kern_cache_all(void);
+void fa_flush_user_cache_all(void);
+void fa_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void fa_coherent_kern_range(unsigned long, unsigned long);
+int fa_coherent_user_range(unsigned long, unsigned long);
+void fa_flush_kern_dcache_area(void *, size_t);
+void fa_dma_map_area(const void *, size_t, int);
+void fa_dma_unmap_area(const void *, size_t, int);
+void fa_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns fa_cache_fns __initconst = {
+ .flush_icache_all = fa_flush_icache_all,
+ .flush_kern_all = fa_flush_kern_cache_all,
+ .flush_kern_louis = fa_flush_kern_cache_all,
+ .flush_user_all = fa_flush_user_cache_all,
+ .flush_user_range = fa_flush_user_cache_range,
+ .coherent_kern_range = fa_coherent_kern_range,
+ .coherent_user_range = fa_coherent_user_range,
+ .flush_kern_dcache_area = fa_flush_kern_dcache_area,
+ .dma_map_area = fa_dma_map_area,
+ .dma_unmap_area = fa_dma_unmap_area,
+ .dma_flush_range = fa_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_CACHE_V6
+void v6_flush_icache_all(void);
+void v6_flush_kern_cache_all(void);
+void v6_flush_user_cache_all(void);
+void v6_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v6_coherent_kern_range(unsigned long, unsigned long);
+int v6_coherent_user_range(unsigned long, unsigned long);
+void v6_flush_kern_dcache_area(void *, size_t);
+void v6_dma_map_area(const void *, size_t, int);
+void v6_dma_unmap_area(const void *, size_t, int);
+void v6_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v6_cache_fns __initconst = {
+ .flush_icache_all = v6_flush_icache_all,
+ .flush_kern_all = v6_flush_kern_cache_all,
+ .flush_kern_louis = v6_flush_kern_cache_all,
+ .flush_user_all = v6_flush_user_cache_all,
+ .flush_user_range = v6_flush_user_cache_range,
+ .coherent_kern_range = v6_coherent_kern_range,
+ .coherent_user_range = v6_coherent_user_range,
+ .flush_kern_dcache_area = v6_flush_kern_dcache_area,
+ .dma_map_area = v6_dma_map_area,
+ .dma_unmap_area = v6_dma_unmap_area,
+ .dma_flush_range = v6_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_CACHE_V7
+void v7_flush_icache_all(void);
+void v7_flush_kern_cache_all(void);
+void v7_flush_kern_cache_louis(void);
+void v7_flush_user_cache_all(void);
+void v7_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v7_coherent_kern_range(unsigned long, unsigned long);
+int v7_coherent_user_range(unsigned long, unsigned long);
+void v7_flush_kern_dcache_area(void *, size_t);
+void v7_dma_map_area(const void *, size_t, int);
+void v7_dma_unmap_area(const void *, size_t, int);
+void v7_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v7_cache_fns __initconst = {
+ .flush_icache_all = v7_flush_icache_all,
+ .flush_kern_all = v7_flush_kern_cache_all,
+ .flush_kern_louis = v7_flush_kern_cache_louis,
+ .flush_user_all = v7_flush_user_cache_all,
+ .flush_user_range = v7_flush_user_cache_range,
+ .coherent_kern_range = v7_coherent_kern_range,
+ .coherent_user_range = v7_coherent_user_range,
+ .flush_kern_dcache_area = v7_flush_kern_dcache_area,
+ .dma_map_area = v7_dma_map_area,
+ .dma_unmap_area = v7_dma_unmap_area,
+ .dma_flush_range = v7_dma_flush_range,
+};
+
+/* Special quirky cache flush function for Broadcom B15 v7 caches */
+void b15_flush_kern_cache_all(void);
+
+struct cpu_cache_fns b15_cache_fns __initconst = {
+ .flush_icache_all = v7_flush_icache_all,
+#ifdef CONFIG_CACHE_B15_RAC
+ .flush_kern_all = b15_flush_kern_cache_all,
+#else
+ .flush_kern_all = v7_flush_kern_cache_all,
+#endif
+ .flush_kern_louis = v7_flush_kern_cache_louis,
+ .flush_user_all = v7_flush_user_cache_all,
+ .flush_user_range = v7_flush_user_cache_range,
+ .coherent_kern_range = v7_coherent_kern_range,
+ .coherent_user_range = v7_coherent_user_range,
+ .flush_kern_dcache_area = v7_flush_kern_dcache_area,
+ .dma_map_area = v7_dma_map_area,
+ .dma_unmap_area = v7_dma_unmap_area,
+ .dma_flush_range = v7_dma_flush_range,
+};
+#endif
+
+/* The NOP cache is just a set of dummy stubs that by definition does nothing */
+#ifdef CONFIG_CPU_CACHE_NOP
+void nop_flush_icache_all(void);
+void nop_flush_kern_cache_all(void);
+void nop_flush_user_cache_all(void);
+void nop_flush_user_cache_range(unsigned long start, unsigned long end, unsigned int flags);
+void nop_coherent_kern_range(unsigned long start, unsigned long end);
+int nop_coherent_user_range(unsigned long, unsigned long);
+void nop_flush_kern_dcache_area(void *kaddr, size_t size);
+void nop_dma_map_area(const void *start, size_t size, int flags);
+void nop_dma_unmap_area(const void *start, size_t size, int flags);
+void nop_dma_flush_range(const void *start, const void *end);
+
+struct cpu_cache_fns nop_cache_fns __initconst = {
+ .flush_icache_all = nop_flush_icache_all,
+ .flush_kern_all = nop_flush_kern_cache_all,
+ .flush_kern_louis = nop_flush_kern_cache_all,
+ .flush_user_all = nop_flush_user_cache_all,
+ .flush_user_range = nop_flush_user_cache_range,
+ .coherent_kern_range = nop_coherent_kern_range,
+ .coherent_user_range = nop_coherent_user_range,
+ .flush_kern_dcache_area = nop_flush_kern_dcache_area,
+ .dma_map_area = nop_dma_map_area,
+ .dma_unmap_area = nop_dma_unmap_area,
+ .dma_flush_range = nop_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_CACHE_V7M
+void v7m_flush_icache_all(void);
+void v7m_flush_kern_cache_all(void);
+void v7m_flush_user_cache_all(void);
+void v7m_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void v7m_coherent_kern_range(unsigned long, unsigned long);
+int v7m_coherent_user_range(unsigned long, unsigned long);
+void v7m_flush_kern_dcache_area(void *, size_t);
+void v7m_dma_map_area(const void *, size_t, int);
+void v7m_dma_unmap_area(const void *, size_t, int);
+void v7m_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns v7m_cache_fns __initconst = {
+ .flush_icache_all = v7m_flush_icache_all,
+ .flush_kern_all = v7m_flush_kern_cache_all,
+ .flush_kern_louis = v7m_flush_kern_cache_all,
+ .flush_user_all = v7m_flush_user_cache_all,
+ .flush_user_range = v7m_flush_user_cache_range,
+ .coherent_kern_range = v7m_coherent_kern_range,
+ .coherent_user_range = v7m_coherent_user_range,
+ .flush_kern_dcache_area = v7m_flush_kern_dcache_area,
+ .dma_map_area = v7m_dma_map_area,
+ .dma_unmap_area = v7m_dma_unmap_area,
+ .dma_flush_range = v7m_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM1020
+void arm1020_flush_icache_all(void);
+void arm1020_flush_kern_cache_all(void);
+void arm1020_flush_user_cache_all(void);
+void arm1020_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm1020_coherent_kern_range(unsigned long, unsigned long);
+int arm1020_coherent_user_range(unsigned long, unsigned long);
+void arm1020_flush_kern_dcache_area(void *, size_t);
+void arm1020_dma_map_area(const void *, size_t, int);
+void arm1020_dma_unmap_area(const void *, size_t, int);
+void arm1020_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm1020_cache_fns __initconst = {
+ .flush_icache_all = arm1020_flush_icache_all,
+ .flush_kern_all = arm1020_flush_kern_cache_all,
+ .flush_kern_louis = arm1020_flush_kern_cache_all,
+ .flush_user_all = arm1020_flush_user_cache_all,
+ .flush_user_range = arm1020_flush_user_cache_range,
+ .coherent_kern_range = arm1020_coherent_kern_range,
+ .coherent_user_range = arm1020_coherent_user_range,
+ .flush_kern_dcache_area = arm1020_flush_kern_dcache_area,
+ .dma_map_area = arm1020_dma_map_area,
+ .dma_unmap_area = arm1020_dma_unmap_area,
+ .dma_flush_range = arm1020_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM1020E
+void arm1020e_flush_icache_all(void);
+void arm1020e_flush_kern_cache_all(void);
+void arm1020e_flush_user_cache_all(void);
+void arm1020e_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm1020e_coherent_kern_range(unsigned long, unsigned long);
+int arm1020e_coherent_user_range(unsigned long, unsigned long);
+void arm1020e_flush_kern_dcache_area(void *, size_t);
+void arm1020e_dma_map_area(const void *, size_t, int);
+void arm1020e_dma_unmap_area(const void *, size_t, int);
+void arm1020e_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm1020e_cache_fns __initconst = {
+ .flush_icache_all = arm1020e_flush_icache_all,
+ .flush_kern_all = arm1020e_flush_kern_cache_all,
+ .flush_kern_louis = arm1020e_flush_kern_cache_all,
+ .flush_user_all = arm1020e_flush_user_cache_all,
+ .flush_user_range = arm1020e_flush_user_cache_range,
+ .coherent_kern_range = arm1020e_coherent_kern_range,
+ .coherent_user_range = arm1020e_coherent_user_range,
+ .flush_kern_dcache_area = arm1020e_flush_kern_dcache_area,
+ .dma_map_area = arm1020e_dma_map_area,
+ .dma_unmap_area = arm1020e_dma_unmap_area,
+ .dma_flush_range = arm1020e_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM1022
+void arm1022_flush_icache_all(void);
+void arm1022_flush_kern_cache_all(void);
+void arm1022_flush_user_cache_all(void);
+void arm1022_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm1022_coherent_kern_range(unsigned long, unsigned long);
+int arm1022_coherent_user_range(unsigned long, unsigned long);
+void arm1022_flush_kern_dcache_area(void *, size_t);
+void arm1022_dma_map_area(const void *, size_t, int);
+void arm1022_dma_unmap_area(const void *, size_t, int);
+void arm1022_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm1022_cache_fns __initconst = {
+ .flush_icache_all = arm1022_flush_icache_all,
+ .flush_kern_all = arm1022_flush_kern_cache_all,
+ .flush_kern_louis = arm1022_flush_kern_cache_all,
+ .flush_user_all = arm1022_flush_user_cache_all,
+ .flush_user_range = arm1022_flush_user_cache_range,
+ .coherent_kern_range = arm1022_coherent_kern_range,
+ .coherent_user_range = arm1022_coherent_user_range,
+ .flush_kern_dcache_area = arm1022_flush_kern_dcache_area,
+ .dma_map_area = arm1022_dma_map_area,
+ .dma_unmap_area = arm1022_dma_unmap_area,
+ .dma_flush_range = arm1022_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM1026
+void arm1026_flush_icache_all(void);
+void arm1026_flush_kern_cache_all(void);
+void arm1026_flush_user_cache_all(void);
+void arm1026_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm1026_coherent_kern_range(unsigned long, unsigned long);
+int arm1026_coherent_user_range(unsigned long, unsigned long);
+void arm1026_flush_kern_dcache_area(void *, size_t);
+void arm1026_dma_map_area(const void *, size_t, int);
+void arm1026_dma_unmap_area(const void *, size_t, int);
+void arm1026_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm1026_cache_fns __initconst = {
+ .flush_icache_all = arm1026_flush_icache_all,
+ .flush_kern_all = arm1026_flush_kern_cache_all,
+ .flush_kern_louis = arm1026_flush_kern_cache_all,
+ .flush_user_all = arm1026_flush_user_cache_all,
+ .flush_user_range = arm1026_flush_user_cache_range,
+ .coherent_kern_range = arm1026_coherent_kern_range,
+ .coherent_user_range = arm1026_coherent_user_range,
+ .flush_kern_dcache_area = arm1026_flush_kern_dcache_area,
+ .dma_map_area = arm1026_dma_map_area,
+ .dma_unmap_area = arm1026_dma_unmap_area,
+ .dma_flush_range = arm1026_dma_flush_range,
+};
+#endif
+
+#if defined(CONFIG_CPU_ARM920T) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
+void arm920_flush_icache_all(void);
+void arm920_flush_kern_cache_all(void);
+void arm920_flush_user_cache_all(void);
+void arm920_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm920_coherent_kern_range(unsigned long, unsigned long);
+int arm920_coherent_user_range(unsigned long, unsigned long);
+void arm920_flush_kern_dcache_area(void *, size_t);
+void arm920_dma_map_area(const void *, size_t, int);
+void arm920_dma_unmap_area(const void *, size_t, int);
+void arm920_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm920_cache_fns __initconst = {
+ .flush_icache_all = arm920_flush_icache_all,
+ .flush_kern_all = arm920_flush_kern_cache_all,
+ .flush_kern_louis = arm920_flush_kern_cache_all,
+ .flush_user_all = arm920_flush_user_cache_all,
+ .flush_user_range = arm920_flush_user_cache_range,
+ .coherent_kern_range = arm920_coherent_kern_range,
+ .coherent_user_range = arm920_coherent_user_range,
+ .flush_kern_dcache_area = arm920_flush_kern_dcache_area,
+ .dma_map_area = arm920_dma_map_area,
+ .dma_unmap_area = arm920_dma_unmap_area,
+ .dma_flush_range = arm920_dma_flush_range,
+};
+#endif
+
+#if defined(CONFIG_CPU_ARM922T) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
+void arm922_flush_icache_all(void);
+void arm922_flush_kern_cache_all(void);
+void arm922_flush_user_cache_all(void);
+void arm922_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm922_coherent_kern_range(unsigned long, unsigned long);
+int arm922_coherent_user_range(unsigned long, unsigned long);
+void arm922_flush_kern_dcache_area(void *, size_t);
+void arm922_dma_map_area(const void *, size_t, int);
+void arm922_dma_unmap_area(const void *, size_t, int);
+void arm922_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm922_cache_fns __initconst = {
+ .flush_icache_all = arm922_flush_icache_all,
+ .flush_kern_all = arm922_flush_kern_cache_all,
+ .flush_kern_louis = arm922_flush_kern_cache_all,
+ .flush_user_all = arm922_flush_user_cache_all,
+ .flush_user_range = arm922_flush_user_cache_range,
+ .coherent_kern_range = arm922_coherent_kern_range,
+ .coherent_user_range = arm922_coherent_user_range,
+ .flush_kern_dcache_area = arm922_flush_kern_dcache_area,
+ .dma_map_area = arm922_dma_map_area,
+ .dma_unmap_area = arm922_dma_unmap_area,
+ .dma_flush_range = arm922_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM925T
+void arm925_flush_icache_all(void);
+void arm925_flush_kern_cache_all(void);
+void arm925_flush_user_cache_all(void);
+void arm925_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm925_coherent_kern_range(unsigned long, unsigned long);
+int arm925_coherent_user_range(unsigned long, unsigned long);
+void arm925_flush_kern_dcache_area(void *, size_t);
+void arm925_dma_map_area(const void *, size_t, int);
+void arm925_dma_unmap_area(const void *, size_t, int);
+void arm925_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm925_cache_fns __initconst = {
+ .flush_icache_all = arm925_flush_icache_all,
+ .flush_kern_all = arm925_flush_kern_cache_all,
+ .flush_kern_louis = arm925_flush_kern_cache_all,
+ .flush_user_all = arm925_flush_user_cache_all,
+ .flush_user_range = arm925_flush_user_cache_range,
+ .coherent_kern_range = arm925_coherent_kern_range,
+ .coherent_user_range = arm925_coherent_user_range,
+ .flush_kern_dcache_area = arm925_flush_kern_dcache_area,
+ .dma_map_area = arm925_dma_map_area,
+ .dma_unmap_area = arm925_dma_unmap_area,
+ .dma_flush_range = arm925_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM926T
+void arm926_flush_icache_all(void);
+void arm926_flush_kern_cache_all(void);
+void arm926_flush_user_cache_all(void);
+void arm926_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm926_coherent_kern_range(unsigned long, unsigned long);
+int arm926_coherent_user_range(unsigned long, unsigned long);
+void arm926_flush_kern_dcache_area(void *, size_t);
+void arm926_dma_map_area(const void *, size_t, int);
+void arm926_dma_unmap_area(const void *, size_t, int);
+void arm926_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm926_cache_fns __initconst = {
+ .flush_icache_all = arm926_flush_icache_all,
+ .flush_kern_all = arm926_flush_kern_cache_all,
+ .flush_kern_louis = arm926_flush_kern_cache_all,
+ .flush_user_all = arm926_flush_user_cache_all,
+ .flush_user_range = arm926_flush_user_cache_range,
+ .coherent_kern_range = arm926_coherent_kern_range,
+ .coherent_user_range = arm926_coherent_user_range,
+ .flush_kern_dcache_area = arm926_flush_kern_dcache_area,
+ .dma_map_area = arm926_dma_map_area,
+ .dma_unmap_area = arm926_dma_unmap_area,
+ .dma_flush_range = arm926_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM940T
+void arm940_flush_icache_all(void);
+void arm940_flush_kern_cache_all(void);
+void arm940_flush_user_cache_all(void);
+void arm940_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm940_coherent_kern_range(unsigned long, unsigned long);
+int arm940_coherent_user_range(unsigned long, unsigned long);
+void arm940_flush_kern_dcache_area(void *, size_t);
+void arm940_dma_map_area(const void *, size_t, int);
+void arm940_dma_unmap_area(const void *, size_t, int);
+void arm940_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm940_cache_fns __initconst = {
+ .flush_icache_all = arm940_flush_icache_all,
+ .flush_kern_all = arm940_flush_kern_cache_all,
+ .flush_kern_louis = arm940_flush_kern_cache_all,
+ .flush_user_all = arm940_flush_user_cache_all,
+ .flush_user_range = arm940_flush_user_cache_range,
+ .coherent_kern_range = arm940_coherent_kern_range,
+ .coherent_user_range = arm940_coherent_user_range,
+ .flush_kern_dcache_area = arm940_flush_kern_dcache_area,
+ .dma_map_area = arm940_dma_map_area,
+ .dma_unmap_area = arm940_dma_unmap_area,
+ .dma_flush_range = arm940_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_ARM946E
+void arm946_flush_icache_all(void);
+void arm946_flush_kern_cache_all(void);
+void arm946_flush_user_cache_all(void);
+void arm946_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void arm946_coherent_kern_range(unsigned long, unsigned long);
+int arm946_coherent_user_range(unsigned long, unsigned long);
+void arm946_flush_kern_dcache_area(void *, size_t);
+void arm946_dma_map_area(const void *, size_t, int);
+void arm946_dma_unmap_area(const void *, size_t, int);
+void arm946_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns arm946_cache_fns __initconst = {
+ .flush_icache_all = arm946_flush_icache_all,
+ .flush_kern_all = arm946_flush_kern_cache_all,
+ .flush_kern_louis = arm946_flush_kern_cache_all,
+ .flush_user_all = arm946_flush_user_cache_all,
+ .flush_user_range = arm946_flush_user_cache_range,
+ .coherent_kern_range = arm946_coherent_kern_range,
+ .coherent_user_range = arm946_coherent_user_range,
+ .flush_kern_dcache_area = arm946_flush_kern_dcache_area,
+ .dma_map_area = arm946_dma_map_area,
+ .dma_unmap_area = arm946_dma_unmap_area,
+ .dma_flush_range = arm946_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_XSCALE
+void xscale_flush_icache_all(void);
+void xscale_flush_kern_cache_all(void);
+void xscale_flush_user_cache_all(void);
+void xscale_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void xscale_coherent_kern_range(unsigned long, unsigned long);
+int xscale_coherent_user_range(unsigned long, unsigned long);
+void xscale_flush_kern_dcache_area(void *, size_t);
+void xscale_dma_map_area(const void *, size_t, int);
+void xscale_dma_unmap_area(const void *, size_t, int);
+void xscale_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns xscale_cache_fns __initconst = {
+ .flush_icache_all = xscale_flush_icache_all,
+ .flush_kern_all = xscale_flush_kern_cache_all,
+ .flush_kern_louis = xscale_flush_kern_cache_all,
+ .flush_user_all = xscale_flush_user_cache_all,
+ .flush_user_range = xscale_flush_user_cache_range,
+ .coherent_kern_range = xscale_coherent_kern_range,
+ .coherent_user_range = xscale_coherent_user_range,
+ .flush_kern_dcache_area = xscale_flush_kern_dcache_area,
+ .dma_map_area = xscale_dma_map_area,
+ .dma_unmap_area = xscale_dma_unmap_area,
+ .dma_flush_range = xscale_dma_flush_range,
+};
+
+/* The 80200 A0 and A1 need a special quirk for dma_map_area() */
+void xscale_80200_A0_A1_dma_map_area(const void *, size_t, int);
+
+struct cpu_cache_fns xscale_80200_A0_A1_cache_fns __initconst = {
+ .flush_icache_all = xscale_flush_icache_all,
+ .flush_kern_all = xscale_flush_kern_cache_all,
+ .flush_kern_louis = xscale_flush_kern_cache_all,
+ .flush_user_all = xscale_flush_user_cache_all,
+ .flush_user_range = xscale_flush_user_cache_range,
+ .coherent_kern_range = xscale_coherent_kern_range,
+ .coherent_user_range = xscale_coherent_user_range,
+ .flush_kern_dcache_area = xscale_flush_kern_dcache_area,
+ .dma_map_area = xscale_80200_A0_A1_dma_map_area,
+ .dma_unmap_area = xscale_dma_unmap_area,
+ .dma_flush_range = xscale_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_XSC3
+void xsc3_flush_icache_all(void);
+void xsc3_flush_kern_cache_all(void);
+void xsc3_flush_user_cache_all(void);
+void xsc3_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void xsc3_coherent_kern_range(unsigned long, unsigned long);
+int xsc3_coherent_user_range(unsigned long, unsigned long);
+void xsc3_flush_kern_dcache_area(void *, size_t);
+void xsc3_dma_map_area(const void *, size_t, int);
+void xsc3_dma_unmap_area(const void *, size_t, int);
+void xsc3_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns xsc3_cache_fns __initconst = {
+ .flush_icache_all = xsc3_flush_icache_all,
+ .flush_kern_all = xsc3_flush_kern_cache_all,
+ .flush_kern_louis = xsc3_flush_kern_cache_all,
+ .flush_user_all = xsc3_flush_user_cache_all,
+ .flush_user_range = xsc3_flush_user_cache_range,
+ .coherent_kern_range = xsc3_coherent_kern_range,
+ .coherent_user_range = xsc3_coherent_user_range,
+ .flush_kern_dcache_area = xsc3_flush_kern_dcache_area,
+ .dma_map_area = xsc3_dma_map_area,
+ .dma_unmap_area = xsc3_dma_unmap_area,
+ .dma_flush_range = xsc3_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_MOHAWK
+void mohawk_flush_icache_all(void);
+void mohawk_flush_kern_cache_all(void);
+void mohawk_flush_user_cache_all(void);
+void mohawk_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void mohawk_coherent_kern_range(unsigned long, unsigned long);
+int mohawk_coherent_user_range(unsigned long, unsigned long);
+void mohawk_flush_kern_dcache_area(void *, size_t);
+void mohawk_dma_map_area(const void *, size_t, int);
+void mohawk_dma_unmap_area(const void *, size_t, int);
+void mohawk_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns mohawk_cache_fns __initconst = {
+ .flush_icache_all = mohawk_flush_icache_all,
+ .flush_kern_all = mohawk_flush_kern_cache_all,
+ .flush_kern_louis = mohawk_flush_kern_cache_all,
+ .flush_user_all = mohawk_flush_user_cache_all,
+ .flush_user_range = mohawk_flush_user_cache_range,
+ .coherent_kern_range = mohawk_coherent_kern_range,
+ .coherent_user_range = mohawk_coherent_user_range,
+ .flush_kern_dcache_area = mohawk_flush_kern_dcache_area,
+ .dma_map_area = mohawk_dma_map_area,
+ .dma_unmap_area = mohawk_dma_unmap_area,
+ .dma_flush_range = mohawk_dma_flush_range,
+};
+#endif
+
+#ifdef CONFIG_CPU_FEROCEON
+void feroceon_flush_icache_all(void);
+void feroceon_flush_kern_cache_all(void);
+void feroceon_flush_user_cache_all(void);
+void feroceon_flush_user_cache_range(unsigned long, unsigned long, unsigned int);
+void feroceon_coherent_kern_range(unsigned long, unsigned long);
+int feroceon_coherent_user_range(unsigned long, unsigned long);
+void feroceon_flush_kern_dcache_area(void *, size_t);
+void feroceon_dma_map_area(const void *, size_t, int);
+void feroceon_dma_unmap_area(const void *, size_t, int);
+void feroceon_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns feroceon_cache_fns __initconst = {
+ .flush_icache_all = feroceon_flush_icache_all,
+ .flush_kern_all = feroceon_flush_kern_cache_all,
+ .flush_kern_louis = feroceon_flush_kern_cache_all,
+ .flush_user_all = feroceon_flush_user_cache_all,
+ .flush_user_range = feroceon_flush_user_cache_range,
+ .coherent_kern_range = feroceon_coherent_kern_range,
+ .coherent_user_range = feroceon_coherent_user_range,
+ .flush_kern_dcache_area = feroceon_flush_kern_dcache_area,
+ .dma_map_area = feroceon_dma_map_area,
+ .dma_unmap_area = feroceon_dma_unmap_area,
+ .dma_flush_range = feroceon_dma_flush_range,
+};
+
+void feroceon_range_flush_kern_dcache_area(void *, size_t);
+void feroceon_range_dma_map_area(const void *, size_t, int);
+void feroceon_range_dma_flush_range(const void *, const void *);
+
+struct cpu_cache_fns feroceon_range_cache_fns __initconst = {
+ .flush_icache_all = feroceon_flush_icache_all,
+ .flush_kern_all = feroceon_flush_kern_cache_all,
+ .flush_kern_louis = feroceon_flush_kern_cache_all,
+ .flush_user_all = feroceon_flush_user_cache_all,
+ .flush_user_range = feroceon_flush_user_cache_range,
+ .coherent_kern_range = feroceon_coherent_kern_range,
+ .coherent_user_range = feroceon_coherent_user_range,
+ .flush_kern_dcache_area = feroceon_range_flush_kern_dcache_area,
+ .dma_map_area = feroceon_range_dma_map_area,
+ .dma_unmap_area = feroceon_dma_unmap_area,
+ .dma_flush_range = feroceon_range_dma_flush_range,
+};
+#endif
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 439dc6a26bb9..dfa9554ef331 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -242,6 +242,27 @@ static inline bool is_permission_fault(unsigned int fsr)
return false;
}
+#ifdef CONFIG_CPU_TTBR0_PAN
+static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
+{
+ struct svc_pt_regs *svcregs;
+
+ /* If we are in user mode: permission granted */
+ if (user_mode(regs))
+ return true;
+
+ /* uaccess state saved above pt_regs on SVC exception entry */
+ svcregs = to_svc_pt_regs(regs);
+
+ return !(svcregs->ttbcr & TTBCR_EPD0);
+}
+#else
+static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs)
+{
+ return true;
+}
+#endif
+
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
@@ -285,6 +306,14 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ /*
+ * Privileged access aborts with CONFIG_CPU_TTBR0_PAN enabled are
+ * routed via the translation fault mechanism. Check whether uaccess
+ * is disabled while in kernel mode.
+ */
+ if (!ttbr0_usermode_access_allowed(regs))
+ goto no_context;
+
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e8c6f4be0ce1..5345d218899a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
#include <linux/sizes.h>
#include <linux/stop_machine.h>
#include <linux/swiotlb.h>
+#include <linux/execmem.h>
#include <asm/cp15.h>
#include <asm/mach-types.h>
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long end)
free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
#endif
+
+#ifdef CONFIG_EXECMEM
+
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * The XIP kernel text is mapped in the module area for modules and
+ * some other stuff to work without any indirect relocations.
+ * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
+ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
+ */
+#undef MODULES_VADDR
+#define MODULES_VADDR (((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
+#endif
+
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ unsigned long fallback_start = 0, fallback_end = 0;
+
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+ fallback_start = VMALLOC_START;
+ fallback_end = VMALLOC_END;
+ }
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL_EXEC,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_MMU */
+
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index c24e29c0b9a4..3f774856ca67 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1687,9 +1687,8 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
*/
cr = get_cr();
set_cr(cr & ~(CR_I | CR_C));
- asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
- asm volatile("mcr p15, 0, %0, c2, c0, 2"
- : : "r" (ttbcr & ~(3 << 8 | 3 << 10)));
+ ttbcr = cpu_get_ttbcr();
+ cpu_set_ttbcr(ttbcr & ~(3 << 8 | 3 << 10));
flush_cache_all();
/*
@@ -1701,7 +1700,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
lpae_pgtables_remap(offset, pa_pgd);
/* Re-enable the caches and cacheable TLB walks */
- asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
+ cpu_set_ttbcr(ttbcr);
set_cr(cr);
}
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 6837cf7a4812..d0ce3414a13e 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -56,18 +57,20 @@
/*
* cpu_arm1020_proc_init()
*/
-ENTRY(cpu_arm1020_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm1020_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm1020_proc_init)
/*
* cpu_arm1020_proc_fin()
*/
-ENTRY(cpu_arm1020_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm1020_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm1020_proc_fin)
/*
* cpu_arm1020_reset(loc)
@@ -80,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm1020_reset)
+SYM_TYPED_FUNC_START(cpu_arm1020_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -92,16 +95,17 @@ ENTRY(cpu_arm1020_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm1020_reset)
+SYM_FUNC_END(cpu_arm1020_reset)
.popsection
/*
* cpu_arm1020_do_idle()
*/
.align 5
-ENTRY(cpu_arm1020_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm1020_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm1020_do_idle)
/* ================================= CACHE ================================ */
@@ -112,13 +116,13 @@ ENTRY(cpu_arm1020_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm1020_flush_icache_all)
+SYM_TYPED_FUNC_START(arm1020_flush_icache_all)
#ifndef CONFIG_CPU_ICACHE_DISABLE
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
ret lr
-ENDPROC(arm1020_flush_icache_all)
+SYM_FUNC_END(arm1020_flush_icache_all)
/*
* flush_user_cache_all()
@@ -126,14 +130,14 @@ ENDPROC(arm1020_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(arm1020_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm1020_flush_user_cache_all, arm1020_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm1020_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm1020_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -154,6 +158,7 @@ __flush_whole_cache:
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -165,7 +170,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags for this space
*/
-ENTRY(arm1020_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm1020_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -185,6 +190,7 @@ ENTRY(arm1020_flush_user_cache_range)
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -196,8 +202,11 @@ ENTRY(arm1020_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020_coherent_kern_range)
- /* FALLTRHOUGH */
+SYM_TYPED_FUNC_START(arm1020_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm1020_coherent_user_range
+#endif
+SYM_FUNC_END(arm1020_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -209,7 +218,7 @@ ENTRY(arm1020_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020_coherent_user_range)
+SYM_TYPED_FUNC_START(arm1020_coherent_user_range)
mov ip, #0
bic r0, r0, #CACHE_DLINESIZE - 1
mcr p15, 0, ip, c7, c10, 4
@@ -227,6 +236,7 @@ ENTRY(arm1020_coherent_user_range)
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm1020_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -237,7 +247,7 @@ ENTRY(arm1020_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm1020_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm1020_flush_kern_dcache_area)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
add r1, r0, r1
@@ -249,6 +259,7 @@ ENTRY(arm1020_flush_kern_dcache_area)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -314,7 +325,7 @@ arm1020_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020_dma_flush_range)
+SYM_TYPED_FUNC_START(arm1020_dma_flush_range)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CACHE_DLINESIZE - 1
@@ -327,6 +338,7 @@ ENTRY(arm1020_dma_flush_range)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -334,13 +346,13 @@ ENTRY(arm1020_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1020_dma_map_area)
+SYM_TYPED_FUNC_START(arm1020_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm1020_dma_clean_range
bcs arm1020_dma_inv_range
b arm1020_dma_flush_range
-ENDPROC(arm1020_dma_map_area)
+SYM_FUNC_END(arm1020_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -348,18 +360,12 @@ ENDPROC(arm1020_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1020_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm1020_dma_unmap_area)
ret lr
-ENDPROC(arm1020_dma_unmap_area)
-
- .globl arm1020_flush_kern_cache_louis
- .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm1020
+SYM_FUNC_END(arm1020_dma_unmap_area)
.align 5
-ENTRY(cpu_arm1020_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm1020_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_DISABLE
mov ip, #0
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
@@ -369,6 +375,7 @@ ENTRY(cpu_arm1020_dcache_clean_area)
bhi 1b
#endif
ret lr
+SYM_FUNC_END(cpu_arm1020_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -380,7 +387,7 @@ ENTRY(cpu_arm1020_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm1020_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm1020_switch_mm)
#ifdef CONFIG_MMU
#ifndef CONFIG_CPU_DCACHE_DISABLE
mcr p15, 0, r3, c7, c10, 4
@@ -408,14 +415,15 @@ ENTRY(cpu_arm1020_switch_mm)
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif /* CONFIG_MMU */
ret lr
-
+SYM_FUNC_END(cpu_arm1020_switch_mm)
+
/*
* cpu_arm1020_set_pte(ptep, pte)
*
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm1020_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm1020_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -426,6 +434,7 @@ ENTRY(cpu_arm1020_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm1020_set_pte_ext)
.type __arm1020_setup, #function
__arm1020_setup:
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index df49b10250b8..64f031bf6eff 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -56,18 +57,20 @@
/*
* cpu_arm1020e_proc_init()
*/
-ENTRY(cpu_arm1020e_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm1020e_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm1020e_proc_init)
/*
* cpu_arm1020e_proc_fin()
*/
-ENTRY(cpu_arm1020e_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm1020e_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm1020e_proc_fin)
/*
* cpu_arm1020e_reset(loc)
@@ -80,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm1020e_reset)
+SYM_TYPED_FUNC_START(cpu_arm1020e_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -92,16 +95,17 @@ ENTRY(cpu_arm1020e_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm1020e_reset)
+SYM_FUNC_END(cpu_arm1020e_reset)
.popsection
/*
* cpu_arm1020e_do_idle()
*/
.align 5
-ENTRY(cpu_arm1020e_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm1020e_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm1020e_do_idle)
/* ================================= CACHE ================================ */
@@ -112,13 +116,13 @@ ENTRY(cpu_arm1020e_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm1020e_flush_icache_all)
+SYM_TYPED_FUNC_START(arm1020e_flush_icache_all)
#ifndef CONFIG_CPU_ICACHE_DISABLE
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
ret lr
-ENDPROC(arm1020e_flush_icache_all)
+SYM_FUNC_END(arm1020e_flush_icache_all)
/*
* flush_user_cache_all()
@@ -126,14 +130,14 @@ ENDPROC(arm1020e_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(arm1020e_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm1020e_flush_user_cache_all, arm1020e_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm1020e_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm1020e_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -153,6 +157,7 @@ __flush_whole_cache:
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020e_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -164,7 +169,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags for this space
*/
-ENTRY(arm1020e_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm1020e_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -182,6 +187,7 @@ ENTRY(arm1020e_flush_user_cache_range)
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020e_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -193,8 +199,12 @@ ENTRY(arm1020e_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020e_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm1020e_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm1020e_coherent_user_range
+#endif
+SYM_FUNC_END(arm1020e_coherent_kern_range)
+
/*
* coherent_user_range(start, end)
*
@@ -205,7 +215,7 @@ ENTRY(arm1020e_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020e_coherent_user_range)
+SYM_TYPED_FUNC_START(arm1020e_coherent_user_range)
mov ip, #0
bic r0, r0, #CACHE_DLINESIZE - 1
1:
@@ -221,6 +231,7 @@ ENTRY(arm1020e_coherent_user_range)
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm1020e_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -231,7 +242,7 @@ ENTRY(arm1020e_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm1020e_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm1020e_flush_kern_dcache_area)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
add r1, r0, r1
@@ -242,6 +253,7 @@ ENTRY(arm1020e_flush_kern_dcache_area)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020e_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -302,7 +314,7 @@ arm1020e_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1020e_dma_flush_range)
+SYM_TYPED_FUNC_START(arm1020e_dma_flush_range)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CACHE_DLINESIZE - 1
@@ -313,6 +325,7 @@ ENTRY(arm1020e_dma_flush_range)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1020e_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -320,13 +333,13 @@ ENTRY(arm1020e_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1020e_dma_map_area)
+SYM_TYPED_FUNC_START(arm1020e_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm1020e_dma_clean_range
bcs arm1020e_dma_inv_range
b arm1020e_dma_flush_range
-ENDPROC(arm1020e_dma_map_area)
+SYM_FUNC_END(arm1020e_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -334,18 +347,12 @@ ENDPROC(arm1020e_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1020e_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm1020e_dma_unmap_area)
ret lr
-ENDPROC(arm1020e_dma_unmap_area)
-
- .globl arm1020e_flush_kern_cache_louis
- .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm1020e
+SYM_FUNC_END(arm1020e_dma_unmap_area)
.align 5
-ENTRY(cpu_arm1020e_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm1020e_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_DISABLE
mov ip, #0
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
@@ -354,6 +361,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area)
bhi 1b
#endif
ret lr
+SYM_FUNC_END(cpu_arm1020e_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -365,7 +373,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm1020e_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm1020e_switch_mm)
#ifdef CONFIG_MMU
#ifndef CONFIG_CPU_DCACHE_DISABLE
mcr p15, 0, r3, c7, c10, 4
@@ -392,14 +400,15 @@ ENTRY(cpu_arm1020e_switch_mm)
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
-
+SYM_FUNC_END(cpu_arm1020e_switch_mm)
+
/*
* cpu_arm1020e_set_pte(ptep, pte)
*
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm1020e_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm1020e_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -408,6 +417,7 @@ ENTRY(cpu_arm1020e_set_pte_ext)
#endif
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm1020e_set_pte_ext)
.type __arm1020e_setup, #function
__arm1020e_setup:
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index e89ce467f672..42ed5ed07252 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -56,18 +57,20 @@
/*
* cpu_arm1022_proc_init()
*/
-ENTRY(cpu_arm1022_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm1022_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm1022_proc_init)
/*
* cpu_arm1022_proc_fin()
*/
-ENTRY(cpu_arm1022_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm1022_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm1022_proc_fin)
/*
* cpu_arm1022_reset(loc)
@@ -80,7 +83,7 @@ ENTRY(cpu_arm1022_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm1022_reset)
+SYM_TYPED_FUNC_START(cpu_arm1022_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -92,16 +95,17 @@ ENTRY(cpu_arm1022_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm1022_reset)
+SYM_FUNC_END(cpu_arm1022_reset)
.popsection
/*
* cpu_arm1022_do_idle()
*/
.align 5
-ENTRY(cpu_arm1022_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm1022_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm1022_do_idle)
/* ================================= CACHE ================================ */
@@ -112,13 +116,13 @@ ENTRY(cpu_arm1022_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm1022_flush_icache_all)
+SYM_TYPED_FUNC_START(arm1022_flush_icache_all)
#ifndef CONFIG_CPU_ICACHE_DISABLE
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
ret lr
-ENDPROC(arm1022_flush_icache_all)
+SYM_FUNC_END(arm1022_flush_icache_all)
/*
* flush_user_cache_all()
@@ -126,14 +130,14 @@ ENDPROC(arm1022_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(arm1022_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm1022_flush_user_cache_all, arm1022_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm1022_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm1022_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -152,6 +156,7 @@ __flush_whole_cache:
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1022_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -163,7 +168,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags for this space
*/
-ENTRY(arm1022_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm1022_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -181,6 +186,7 @@ ENTRY(arm1022_flush_user_cache_range)
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1022_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -192,8 +198,11 @@ ENTRY(arm1022_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1022_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm1022_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm1022_coherent_user_range
+#endif
+SYM_FUNC_END(arm1022_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -205,7 +214,7 @@ ENTRY(arm1022_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1022_coherent_user_range)
+SYM_TYPED_FUNC_START(arm1022_coherent_user_range)
mov ip, #0
bic r0, r0, #CACHE_DLINESIZE - 1
1:
@@ -221,6 +230,7 @@ ENTRY(arm1022_coherent_user_range)
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm1022_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -231,7 +241,7 @@ ENTRY(arm1022_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm1022_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm1022_flush_kern_dcache_area)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
add r1, r0, r1
@@ -242,6 +252,7 @@ ENTRY(arm1022_flush_kern_dcache_area)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1022_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -302,7 +313,7 @@ arm1022_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1022_dma_flush_range)
+SYM_TYPED_FUNC_START(arm1022_dma_flush_range)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CACHE_DLINESIZE - 1
@@ -313,6 +324,7 @@ ENTRY(arm1022_dma_flush_range)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1022_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -320,13 +332,13 @@ ENTRY(arm1022_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1022_dma_map_area)
+SYM_TYPED_FUNC_START(arm1022_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm1022_dma_clean_range
bcs arm1022_dma_inv_range
b arm1022_dma_flush_range
-ENDPROC(arm1022_dma_map_area)
+SYM_FUNC_END(arm1022_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -334,18 +346,12 @@ ENDPROC(arm1022_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1022_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm1022_dma_unmap_area)
ret lr
-ENDPROC(arm1022_dma_unmap_area)
-
- .globl arm1022_flush_kern_cache_louis
- .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm1022
+SYM_FUNC_END(arm1022_dma_unmap_area)
.align 5
-ENTRY(cpu_arm1022_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm1022_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_DISABLE
mov ip, #0
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
@@ -354,6 +360,7 @@ ENTRY(cpu_arm1022_dcache_clean_area)
bhi 1b
#endif
ret lr
+SYM_FUNC_END(cpu_arm1022_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -365,7 +372,7 @@ ENTRY(cpu_arm1022_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm1022_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm1022_switch_mm)
#ifdef CONFIG_MMU
#ifndef CONFIG_CPU_DCACHE_DISABLE
mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments
@@ -385,14 +392,15 @@ ENTRY(cpu_arm1022_switch_mm)
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
-
+SYM_FUNC_END(cpu_arm1022_switch_mm)
+
/*
* cpu_arm1022_set_pte_ext(ptep, pte, ext)
*
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm1022_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm1022_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -401,6 +409,7 @@ ENTRY(cpu_arm1022_set_pte_ext)
#endif
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm1022_set_pte_ext)
.type __arm1022_setup, #function
__arm1022_setup:
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 7fdd1a205e8e..b3ae62cd553a 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -56,18 +57,20 @@
/*
* cpu_arm1026_proc_init()
*/
-ENTRY(cpu_arm1026_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm1026_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm1026_proc_init)
/*
* cpu_arm1026_proc_fin()
*/
-ENTRY(cpu_arm1026_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm1026_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm1026_proc_fin)
/*
* cpu_arm1026_reset(loc)
@@ -80,7 +83,7 @@ ENTRY(cpu_arm1026_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm1026_reset)
+SYM_TYPED_FUNC_START(cpu_arm1026_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -92,16 +95,17 @@ ENTRY(cpu_arm1026_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm1026_reset)
+SYM_FUNC_END(cpu_arm1026_reset)
.popsection
/*
* cpu_arm1026_do_idle()
*/
.align 5
-ENTRY(cpu_arm1026_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm1026_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm1026_do_idle)
/* ================================= CACHE ================================ */
@@ -112,13 +116,13 @@ ENTRY(cpu_arm1026_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm1026_flush_icache_all)
+SYM_TYPED_FUNC_START(arm1026_flush_icache_all)
#ifndef CONFIG_CPU_ICACHE_DISABLE
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
#endif
ret lr
-ENDPROC(arm1026_flush_icache_all)
+SYM_FUNC_END(arm1026_flush_icache_all)
/*
* flush_user_cache_all()
@@ -126,14 +130,14 @@ ENDPROC(arm1026_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(arm1026_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm1026_flush_user_cache_all, arm1026_flush_kern_cache_all)
+
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm1026_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm1026_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -147,6 +151,7 @@ __flush_whole_cache:
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1026_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -158,7 +163,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags for this space
*/
-ENTRY(arm1026_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm1026_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -176,6 +181,7 @@ ENTRY(arm1026_flush_user_cache_range)
#endif
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1026_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -187,8 +193,12 @@ ENTRY(arm1026_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1026_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm1026_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm1026_coherent_user_range
+#endif
+SYM_FUNC_END(arm1026_coherent_kern_range)
+
/*
* coherent_user_range(start, end)
*
@@ -199,7 +209,7 @@ ENTRY(arm1026_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1026_coherent_user_range)
+SYM_TYPED_FUNC_START(arm1026_coherent_user_range)
mov ip, #0
bic r0, r0, #CACHE_DLINESIZE - 1
1:
@@ -215,6 +225,7 @@ ENTRY(arm1026_coherent_user_range)
mcr p15, 0, ip, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm1026_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -225,7 +236,7 @@ ENTRY(arm1026_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm1026_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm1026_flush_kern_dcache_area)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
add r1, r0, r1
@@ -236,6 +247,7 @@ ENTRY(arm1026_flush_kern_dcache_area)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1026_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -296,7 +308,7 @@ arm1026_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm1026_dma_flush_range)
+SYM_TYPED_FUNC_START(arm1026_dma_flush_range)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CACHE_DLINESIZE - 1
@@ -307,6 +319,7 @@ ENTRY(arm1026_dma_flush_range)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm1026_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -314,13 +327,13 @@ ENTRY(arm1026_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1026_dma_map_area)
+SYM_TYPED_FUNC_START(arm1026_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm1026_dma_clean_range
bcs arm1026_dma_inv_range
b arm1026_dma_flush_range
-ENDPROC(arm1026_dma_map_area)
+SYM_FUNC_END(arm1026_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -328,18 +341,12 @@ ENDPROC(arm1026_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm1026_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm1026_dma_unmap_area)
ret lr
-ENDPROC(arm1026_dma_unmap_area)
-
- .globl arm1026_flush_kern_cache_louis
- .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm1026
+SYM_FUNC_END(arm1026_dma_unmap_area)
.align 5
-ENTRY(cpu_arm1026_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm1026_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_DISABLE
mov ip, #0
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
@@ -348,6 +355,7 @@ ENTRY(cpu_arm1026_dcache_clean_area)
bhi 1b
#endif
ret lr
+SYM_FUNC_END(cpu_arm1026_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -359,7 +367,7 @@ ENTRY(cpu_arm1026_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm1026_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm1026_switch_mm)
#ifdef CONFIG_MMU
mov r1, #0
#ifndef CONFIG_CPU_DCACHE_DISABLE
@@ -374,14 +382,15 @@ ENTRY(cpu_arm1026_switch_mm)
mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
-
+SYM_FUNC_END(cpu_arm1026_switch_mm)
+
/*
* cpu_arm1026_set_pte_ext(ptep, pte, ext)
*
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm1026_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm1026_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -390,6 +399,7 @@ ENTRY(cpu_arm1026_set_pte_ext)
#endif
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm1026_set_pte_ext)
.type __arm1026_setup, #function
__arm1026_setup:
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 3b687e6dd9fd..59732c334e1d 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -20,6 +20,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -35,24 +36,30 @@
*
* Notes : This processor does not require these
*/
-ENTRY(cpu_arm720_dcache_clean_area)
-ENTRY(cpu_arm720_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm720_dcache_clean_area)
ret lr
+SYM_FUNC_END(cpu_arm720_dcache_clean_area)
-ENTRY(cpu_arm720_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm720_proc_init)
+ ret lr
+SYM_FUNC_END(cpu_arm720_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm720_proc_fin)
mrc p15, 0, r0, c1, c0, 0
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm720_proc_fin)
/*
* Function: arm720_proc_do_idle(void)
* Params : r0 = unused
* Purpose : put the processor in proper idle mode
*/
-ENTRY(cpu_arm720_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm720_do_idle)
ret lr
+SYM_FUNC_END(cpu_arm720_do_idle)
/*
* Function: arm720_switch_mm(unsigned long pgd_phys)
@@ -60,7 +67,7 @@ ENTRY(cpu_arm720_do_idle)
* Purpose : Perform a task switch, saving the old process' state and restoring
* the new.
*/
-ENTRY(cpu_arm720_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm720_switch_mm)
#ifdef CONFIG_MMU
mov r1, #0
mcr p15, 0, r1, c7, c7, 0 @ invalidate cache
@@ -68,6 +75,7 @@ ENTRY(cpu_arm720_switch_mm)
mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4)
#endif
ret lr
+SYM_FUNC_END(cpu_arm720_switch_mm)
/*
* Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
@@ -76,11 +84,12 @@ ENTRY(cpu_arm720_switch_mm)
* Purpose : Set a PTE and flush it out of any WB cache
*/
.align 5
-ENTRY(cpu_arm720_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm720_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
#endif
ret lr
+SYM_FUNC_END(cpu_arm720_set_pte_ext)
/*
* Function: arm720_reset
@@ -88,7 +97,7 @@ ENTRY(cpu_arm720_set_pte_ext)
* Notes : This sets up everything for a reset
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm720_reset)
+SYM_TYPED_FUNC_START(cpu_arm720_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate cache
#ifdef CONFIG_MMU
@@ -99,7 +108,7 @@ ENTRY(cpu_arm720_reset)
bic ip, ip, #0x2100 @ ..v....s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm720_reset)
+SYM_FUNC_END(cpu_arm720_reset)
.popsection
.type __arm710_setup, #function
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index f2ec3bc60874..78854df63964 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -24,21 +25,32 @@
*
* These are not required.
*/
-ENTRY(cpu_arm740_proc_init)
-ENTRY(cpu_arm740_do_idle)
-ENTRY(cpu_arm740_dcache_clean_area)
-ENTRY(cpu_arm740_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm740_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm740_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm740_do_idle)
+ ret lr
+SYM_FUNC_END(cpu_arm740_do_idle)
+
+SYM_TYPED_FUNC_START(cpu_arm740_dcache_clean_area)
+ ret lr
+SYM_FUNC_END(cpu_arm740_dcache_clean_area)
+
+SYM_TYPED_FUNC_START(cpu_arm740_switch_mm)
+ ret lr
+SYM_FUNC_END(cpu_arm740_switch_mm)
/*
* cpu_arm740_proc_fin()
*/
-ENTRY(cpu_arm740_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm740_proc_fin)
mrc p15, 0, r0, c1, c0, 0
bic r0, r0, #0x3f000000 @ bank/f/lock/s
bic r0, r0, #0x0000000c @ w-buffer/cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm740_proc_fin)
/*
* cpu_arm740_reset(loc)
@@ -46,14 +58,14 @@ ENTRY(cpu_arm740_proc_fin)
* Notes : This sets up everything for a reset
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm740_reset)
+SYM_TYPED_FUNC_START(cpu_arm740_reset)
mov ip, #0
mcr p15, 0, ip, c7, c0, 0 @ invalidate cache
mrc p15, 0, ip, c1, c0, 0 @ get ctrl register
bic ip, ip, #0x0000000c @ ............wc..
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm740_reset)
+SYM_FUNC_END(cpu_arm740_reset)
.popsection
.type __arm740_setup, #function
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 01bbe7576c1c..baa3d4472147 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -23,18 +24,29 @@
* cpu_arm7tdmi_switch_mm()
*
* These are not required.
- */
-ENTRY(cpu_arm7tdmi_proc_init)
-ENTRY(cpu_arm7tdmi_do_idle)
-ENTRY(cpu_arm7tdmi_dcache_clean_area)
-ENTRY(cpu_arm7tdmi_switch_mm)
- ret lr
+*/
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_proc_init)
+ ret lr
+SYM_FUNC_END(cpu_arm7tdmi_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_do_idle)
+ ret lr
+SYM_FUNC_END(cpu_arm7tdmi_do_idle)
+
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_dcache_clean_area)
+ ret lr
+SYM_FUNC_END(cpu_arm7tdmi_dcache_clean_area)
+
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_switch_mm)
+ ret lr
+SYM_FUNC_END(cpu_arm7tdmi_switch_mm)
/*
* cpu_arm7tdmi_proc_fin()
- */
-ENTRY(cpu_arm7tdmi_proc_fin)
- ret lr
+*/
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_proc_fin)
+ ret lr
+SYM_FUNC_END(cpu_arm7tdmi_proc_fin)
/*
* Function: cpu_arm7tdmi_reset(loc)
@@ -42,9 +54,9 @@ ENTRY(cpu_arm7tdmi_proc_fin)
* Purpose : Sets up everything for a reset and jump to the location for soft reset.
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm7tdmi_reset)
+SYM_TYPED_FUNC_START(cpu_arm7tdmi_reset)
ret r0
-ENDPROC(cpu_arm7tdmi_reset)
+SYM_FUNC_END(cpu_arm7tdmi_reset)
.popsection
.type __arm7tdmi_setup, #function
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index a234cd8ba5e6..a30df54ad5fa 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -48,18 +49,20 @@
/*
* cpu_arm920_proc_init()
*/
-ENTRY(cpu_arm920_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm920_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm920_proc_init)
/*
* cpu_arm920_proc_fin()
*/
-ENTRY(cpu_arm920_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm920_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm920_proc_fin)
/*
* cpu_arm920_reset(loc)
@@ -72,7 +75,7 @@ ENTRY(cpu_arm920_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm920_reset)
+SYM_TYPED_FUNC_START(cpu_arm920_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -84,17 +87,17 @@ ENTRY(cpu_arm920_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm920_reset)
+SYM_FUNC_END(cpu_arm920_reset)
.popsection
/*
* cpu_arm920_do_idle()
*/
.align 5
-ENTRY(cpu_arm920_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm920_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
-
+SYM_FUNC_END(cpu_arm920_do_idle)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -103,11 +106,11 @@ ENTRY(cpu_arm920_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm920_flush_icache_all)
+SYM_TYPED_FUNC_START(arm920_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm920_flush_icache_all)
+SYM_FUNC_END(arm920_flush_icache_all)
/*
* flush_user_cache_all()
@@ -115,15 +118,14 @@ ENDPROC(arm920_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(arm920_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm920_flush_user_cache_all, arm920_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm920_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm920_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -138,6 +140,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm920_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -149,7 +152,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags for address space
*/
-ENTRY(arm920_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm920_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -164,6 +167,7 @@ ENTRY(arm920_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm920_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -175,8 +179,11 @@ ENTRY(arm920_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm920_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm920_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm920_coherent_user_range
+#endif
+SYM_FUNC_END(arm920_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -188,7 +195,7 @@ ENTRY(arm920_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm920_coherent_user_range)
+SYM_TYPED_FUNC_START(arm920_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -198,6 +205,7 @@ ENTRY(arm920_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm920_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -208,7 +216,7 @@ ENTRY(arm920_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm920_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm920_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -218,6 +226,7 @@ ENTRY(arm920_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm920_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -272,7 +281,7 @@ arm920_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm920_dma_flush_range)
+SYM_TYPED_FUNC_START(arm920_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -280,6 +289,7 @@ ENTRY(arm920_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm920_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -287,13 +297,13 @@ ENTRY(arm920_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm920_dma_map_area)
+SYM_TYPED_FUNC_START(arm920_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm920_dma_clean_range
bcs arm920_dma_inv_range
b arm920_dma_flush_range
-ENDPROC(arm920_dma_map_area)
+SYM_FUNC_END(arm920_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -301,24 +311,20 @@ ENDPROC(arm920_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm920_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm920_dma_unmap_area)
ret lr
-ENDPROC(arm920_dma_unmap_area)
+SYM_FUNC_END(arm920_dma_unmap_area)
- .globl arm920_flush_kern_cache_louis
- .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm920
-#endif
+#endif /* !CONFIG_CPU_DCACHE_WRITETHROUGH */
-ENTRY(cpu_arm920_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm920_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_arm920_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -330,7 +336,7 @@ ENTRY(cpu_arm920_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm920_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm920_switch_mm)
#ifdef CONFIG_MMU
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -354,6 +360,7 @@ ENTRY(cpu_arm920_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
+SYM_FUNC_END(cpu_arm920_switch_mm)
/*
* cpu_arm920_set_pte(ptep, pte, ext)
@@ -361,7 +368,7 @@ ENTRY(cpu_arm920_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm920_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm920_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -369,21 +376,22 @@ ENTRY(cpu_arm920_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_arm920_set_pte_ext)
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm920_suspend_size
.equ cpu_arm920_suspend_size, 4 * 3
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_arm920_do_suspend)
+SYM_TYPED_FUNC_START(cpu_arm920_do_suspend)
stmfd sp!, {r4 - r6, lr}
mrc p15, 0, r4, c13, c0, 0 @ PID
mrc p15, 0, r5, c3, c0, 0 @ Domain ID
mrc p15, 0, r6, c1, c0, 0 @ Control register
stmia r0, {r4 - r6}
ldmfd sp!, {r4 - r6, pc}
-ENDPROC(cpu_arm920_do_suspend)
+SYM_FUNC_END(cpu_arm920_do_suspend)
-ENTRY(cpu_arm920_do_resume)
+SYM_TYPED_FUNC_START(cpu_arm920_do_resume)
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs
mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches
@@ -393,7 +401,7 @@ ENTRY(cpu_arm920_do_resume)
mcr p15, 0, r1, c2, c0, 0 @ TTB address
mov r0, r6 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_arm920_do_resume)
+SYM_FUNC_END(cpu_arm920_do_resume)
#endif
.type __arm920_setup, #function
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 53c029dcfd83..aac4e048100d 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -14,6 +14,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -50,18 +51,20 @@
/*
* cpu_arm922_proc_init()
*/
-ENTRY(cpu_arm922_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm922_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm922_proc_init)
/*
* cpu_arm922_proc_fin()
*/
-ENTRY(cpu_arm922_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm922_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm922_proc_fin)
/*
* cpu_arm922_reset(loc)
@@ -74,7 +77,7 @@ ENTRY(cpu_arm922_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm922_reset)
+SYM_TYPED_FUNC_START(cpu_arm922_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -86,17 +89,17 @@ ENTRY(cpu_arm922_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm922_reset)
+SYM_FUNC_END(cpu_arm922_reset)
.popsection
/*
* cpu_arm922_do_idle()
*/
.align 5
-ENTRY(cpu_arm922_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm922_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
-
+SYM_FUNC_END(cpu_arm922_do_idle)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -105,11 +108,11 @@ ENTRY(cpu_arm922_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm922_flush_icache_all)
+SYM_TYPED_FUNC_START(arm922_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm922_flush_icache_all)
+SYM_FUNC_END(arm922_flush_icache_all)
/*
* flush_user_cache_all()
@@ -117,15 +120,14 @@ ENDPROC(arm922_flush_icache_all)
* Clean and invalidate all cache entries in a particular
* address space.
*/
-ENTRY(arm922_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm922_flush_user_cache_all, arm922_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm922_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm922_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -140,6 +142,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm922_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -151,7 +154,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags describing address space
*/
-ENTRY(arm922_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm922_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -166,6 +169,7 @@ ENTRY(arm922_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm922_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -177,8 +181,11 @@ ENTRY(arm922_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm922_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm922_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm922_coherent_user_range
+#endif
+SYM_FUNC_END(arm922_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -190,7 +197,7 @@ ENTRY(arm922_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm922_coherent_user_range)
+SYM_TYPED_FUNC_START(arm922_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -200,6 +207,7 @@ ENTRY(arm922_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm922_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -210,7 +218,7 @@ ENTRY(arm922_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm922_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm922_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -220,6 +228,7 @@ ENTRY(arm922_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm922_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -274,7 +283,7 @@ arm922_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm922_dma_flush_range)
+SYM_TYPED_FUNC_START(arm922_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -282,6 +291,7 @@ ENTRY(arm922_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm922_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -289,13 +299,13 @@ ENTRY(arm922_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm922_dma_map_area)
+SYM_TYPED_FUNC_START(arm922_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm922_dma_clean_range
bcs arm922_dma_inv_range
b arm922_dma_flush_range
-ENDPROC(arm922_dma_map_area)
+SYM_FUNC_END(arm922_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -303,19 +313,13 @@ ENDPROC(arm922_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm922_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm922_dma_unmap_area)
ret lr
-ENDPROC(arm922_dma_unmap_area)
-
- .globl arm922_flush_kern_cache_louis
- .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm922
-#endif
+SYM_FUNC_END(arm922_dma_unmap_area)
+#endif /* !CONFIG_CPU_DCACHE_WRITETHROUGH */
-ENTRY(cpu_arm922_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm922_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
@@ -323,6 +327,7 @@ ENTRY(cpu_arm922_dcache_clean_area)
bhi 1b
#endif
ret lr
+SYM_FUNC_END(cpu_arm922_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -334,7 +339,7 @@ ENTRY(cpu_arm922_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm922_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm922_switch_mm)
#ifdef CONFIG_MMU
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -358,6 +363,7 @@ ENTRY(cpu_arm922_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
+SYM_FUNC_END(cpu_arm922_switch_mm)
/*
* cpu_arm922_set_pte_ext(ptep, pte, ext)
@@ -365,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm922_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm922_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -373,6 +379,7 @@ ENTRY(cpu_arm922_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm922_set_pte_ext)
.type __arm922_setup, #function
__arm922_setup:
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 0bfad62ea858..035941faeb2e 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -37,6 +37,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -71,18 +72,20 @@
/*
* cpu_arm925_proc_init()
*/
-ENTRY(cpu_arm925_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm925_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm925_proc_init)
/*
* cpu_arm925_proc_fin()
*/
-ENTRY(cpu_arm925_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm925_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm925_proc_fin)
/*
* cpu_arm925_reset(loc)
@@ -95,14 +98,14 @@ ENTRY(cpu_arm925_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm925_reset)
+SYM_TYPED_FUNC_START(cpu_arm925_reset)
/* Send software reset to MPU and DSP */
mov ip, #0xff000000
orr ip, ip, #0x00fe0000
orr ip, ip, #0x0000ce00
mov r4, #1
strh r4, [ip, #0x10]
-ENDPROC(cpu_arm925_reset)
+SYM_FUNC_END(cpu_arm925_reset)
.popsection
mov ip, #0
@@ -123,7 +126,7 @@ ENDPROC(cpu_arm925_reset)
* Called with IRQs disabled
*/
.align 10
-ENTRY(cpu_arm925_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm925_do_idle)
mov r0, #0
mrc p15, 0, r1, c1, c0, 0 @ Read control register
mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer
@@ -132,17 +135,18 @@ ENTRY(cpu_arm925_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
ret lr
+SYM_FUNC_END(cpu_arm925_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm925_flush_icache_all)
+SYM_TYPED_FUNC_START(arm925_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm925_flush_icache_all)
+SYM_FUNC_END(arm925_flush_icache_all)
/*
* flush_user_cache_all()
@@ -150,15 +154,14 @@ ENDPROC(arm925_flush_icache_all)
* Clean and invalidate all cache entries in a particular
* address space.
*/
-ENTRY(arm925_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm925_flush_user_cache_all, arm925_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm925_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm925_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -175,6 +178,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm925_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -186,7 +190,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags describing address space
*/
-ENTRY(arm925_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm925_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -212,6 +216,7 @@ ENTRY(arm925_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm925_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -223,8 +228,11 @@ ENTRY(arm925_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm925_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm925_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm925_coherent_user_range
+#endif
+SYM_FUNC_END(arm925_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -236,7 +244,7 @@ ENTRY(arm925_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm925_coherent_user_range)
+SYM_TYPED_FUNC_START(arm925_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -246,6 +254,7 @@ ENTRY(arm925_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm925_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -256,7 +265,7 @@ ENTRY(arm925_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm925_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm925_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -266,6 +275,7 @@ ENTRY(arm925_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm925_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -324,7 +334,7 @@ arm925_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm925_dma_flush_range)
+SYM_TYPED_FUNC_START(arm925_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1:
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -337,6 +347,7 @@ ENTRY(arm925_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm925_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -344,13 +355,13 @@ ENTRY(arm925_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm925_dma_map_area)
+SYM_TYPED_FUNC_START(arm925_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm925_dma_clean_range
bcs arm925_dma_inv_range
b arm925_dma_flush_range
-ENDPROC(arm925_dma_map_area)
+SYM_FUNC_END(arm925_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -358,17 +369,11 @@ ENDPROC(arm925_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm925_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm925_dma_unmap_area)
ret lr
-ENDPROC(arm925_dma_unmap_area)
-
- .globl arm925_flush_kern_cache_louis
- .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm925
+SYM_FUNC_END(arm925_dma_unmap_area)
-ENTRY(cpu_arm925_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm925_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
@@ -377,6 +382,7 @@ ENTRY(cpu_arm925_dcache_clean_area)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_arm925_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -388,7 +394,7 @@ ENTRY(cpu_arm925_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm925_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm925_switch_mm)
#ifdef CONFIG_MMU
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -406,6 +412,7 @@ ENTRY(cpu_arm925_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
+SYM_FUNC_END(cpu_arm925_switch_mm)
/*
* cpu_arm925_set_pte_ext(ptep, pte, ext)
@@ -413,7 +420,7 @@ ENTRY(cpu_arm925_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm925_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm925_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -423,6 +430,7 @@ ENTRY(cpu_arm925_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif /* CONFIG_MMU */
ret lr
+SYM_FUNC_END(cpu_arm925_set_pte_ext)
.type __arm925_setup, #function
__arm925_setup:
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 0487a2c3439b..6f43d6af2d9a 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -40,18 +41,20 @@
/*
* cpu_arm926_proc_init()
*/
-ENTRY(cpu_arm926_proc_init)
+SYM_TYPED_FUNC_START(cpu_arm926_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm926_proc_init)
/*
* cpu_arm926_proc_fin()
*/
-ENTRY(cpu_arm926_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm926_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm926_proc_fin)
/*
* cpu_arm926_reset(loc)
@@ -64,7 +67,7 @@ ENTRY(cpu_arm926_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm926_reset)
+SYM_TYPED_FUNC_START(cpu_arm926_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -76,7 +79,7 @@ ENTRY(cpu_arm926_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm926_reset)
+SYM_FUNC_END(cpu_arm926_reset)
.popsection
/*
@@ -85,7 +88,7 @@ ENDPROC(cpu_arm926_reset)
* Called with IRQs disabled
*/
.align 10
-ENTRY(cpu_arm926_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm926_do_idle)
mov r0, #0
mrc p15, 0, r1, c1, c0, 0 @ Read control register
mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer
@@ -98,17 +101,18 @@ ENTRY(cpu_arm926_do_idle)
mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable
msr cpsr_c, r3 @ Restore FIQ state
ret lr
+SYM_FUNC_END(cpu_arm926_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm926_flush_icache_all)
+SYM_TYPED_FUNC_START(arm926_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm926_flush_icache_all)
+SYM_FUNC_END(arm926_flush_icache_all)
/*
* flush_user_cache_all()
@@ -116,15 +120,14 @@ ENDPROC(arm926_flush_icache_all)
* Clean and invalidate all cache entries in a particular
* address space.
*/
-ENTRY(arm926_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm926_flush_user_cache_all, arm926_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm926_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm926_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -138,6 +141,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm926_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -149,7 +153,7 @@ __flush_whole_cache:
* - end - end address (exclusive)
* - flags - vm_flags describing address space
*/
-ENTRY(arm926_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm926_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -175,6 +179,7 @@ ENTRY(arm926_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm926_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -186,8 +191,11 @@ ENTRY(arm926_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm926_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm926_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm926_coherent_user_range
+#endif
+SYM_FUNC_END(arm926_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -199,7 +207,7 @@ ENTRY(arm926_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm926_coherent_user_range)
+SYM_TYPED_FUNC_START(arm926_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -209,6 +217,7 @@ ENTRY(arm926_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm926_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -219,7 +228,7 @@ ENTRY(arm926_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm926_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm926_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -229,6 +238,7 @@ ENTRY(arm926_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm926_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -287,7 +297,7 @@ arm926_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm926_dma_flush_range)
+SYM_TYPED_FUNC_START(arm926_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1:
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -300,6 +310,7 @@ ENTRY(arm926_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm926_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -307,13 +318,13 @@ ENTRY(arm926_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm926_dma_map_area)
+SYM_TYPED_FUNC_START(arm926_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm926_dma_clean_range
bcs arm926_dma_inv_range
b arm926_dma_flush_range
-ENDPROC(arm926_dma_map_area)
+SYM_FUNC_END(arm926_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -321,17 +332,11 @@ ENDPROC(arm926_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm926_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm926_dma_unmap_area)
ret lr
-ENDPROC(arm926_dma_unmap_area)
-
- .globl arm926_flush_kern_cache_louis
- .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all
+SYM_FUNC_END(arm926_dma_unmap_area)
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm926
-
-ENTRY(cpu_arm926_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm926_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
@@ -340,6 +345,7 @@ ENTRY(cpu_arm926_dcache_clean_area)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_arm926_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -351,7 +357,8 @@ ENTRY(cpu_arm926_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_arm926_switch_mm)
+
+SYM_TYPED_FUNC_START(cpu_arm926_switch_mm)
#ifdef CONFIG_MMU
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -367,6 +374,7 @@ ENTRY(cpu_arm926_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
#endif
ret lr
+SYM_FUNC_END(cpu_arm926_switch_mm)
/*
* cpu_arm926_set_pte_ext(ptep, pte, ext)
@@ -374,7 +382,7 @@ ENTRY(cpu_arm926_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_arm926_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_arm926_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -384,21 +392,22 @@ ENTRY(cpu_arm926_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_arm926_set_pte_ext)
/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
.globl cpu_arm926_suspend_size
.equ cpu_arm926_suspend_size, 4 * 3
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_arm926_do_suspend)
+SYM_TYPED_FUNC_START(cpu_arm926_do_suspend)
stmfd sp!, {r4 - r6, lr}
mrc p15, 0, r4, c13, c0, 0 @ PID
mrc p15, 0, r5, c3, c0, 0 @ Domain ID
mrc p15, 0, r6, c1, c0, 0 @ Control register
stmia r0, {r4 - r6}
ldmfd sp!, {r4 - r6, pc}
-ENDPROC(cpu_arm926_do_suspend)
+SYM_FUNC_END(cpu_arm926_do_suspend)
-ENTRY(cpu_arm926_do_resume)
+SYM_TYPED_FUNC_START(cpu_arm926_do_resume)
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs
mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches
@@ -408,7 +417,7 @@ ENTRY(cpu_arm926_do_resume)
mcr p15, 0, r1, c2, c0, 0 @ TTB address
mov r0, r6 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_arm926_do_resume)
+SYM_FUNC_END(cpu_arm926_do_resume)
#endif
.type __arm926_setup, #function
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index cf9bfcc825ca..0d30bb25c42b 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -25,19 +26,24 @@
*
* These are not required.
*/
-ENTRY(cpu_arm940_proc_init)
-ENTRY(cpu_arm940_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm940_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm940_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm940_switch_mm)
+ ret lr
+SYM_FUNC_END(cpu_arm940_switch_mm)
/*
* cpu_arm940_proc_fin()
*/
-ENTRY(cpu_arm940_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm940_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm940_proc_fin)
/*
* cpu_arm940_reset(loc)
@@ -45,7 +51,7 @@ ENTRY(cpu_arm940_proc_fin)
* Notes : This sets up everything for a reset
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm940_reset)
+SYM_TYPED_FUNC_START(cpu_arm940_reset)
mov ip, #0
mcr p15, 0, ip, c7, c5, 0 @ flush I cache
mcr p15, 0, ip, c7, c6, 0 @ flush D cache
@@ -55,42 +61,43 @@ ENTRY(cpu_arm940_reset)
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm940_reset)
+SYM_FUNC_END(cpu_arm940_reset)
.popsection
/*
* cpu_arm940_do_idle()
*/
.align 5
-ENTRY(cpu_arm940_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm940_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm940_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm940_flush_icache_all)
+SYM_TYPED_FUNC_START(arm940_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm940_flush_icache_all)
+SYM_FUNC_END(arm940_flush_icache_all)
/*
* flush_user_cache_all()
*/
-ENTRY(arm940_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm940_flush_user_cache_all, arm940_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm940_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm940_flush_kern_cache_all)
mov r2, #VM_EXEC
- /* FALLTHROUGH */
+ b arm940_flush_user_cache_range
+SYM_FUNC_END(arm940_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -102,7 +109,7 @@ ENTRY(arm940_flush_kern_cache_all)
* - end - end address (exclusive)
* - flags - vm_flags describing address space
*/
-ENTRY(arm940_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm940_flush_user_cache_range)
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
mcr p15, 0, ip, c7, c6, 0 @ flush D cache
@@ -119,6 +126,7 @@ ENTRY(arm940_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm940_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -130,8 +138,9 @@ ENTRY(arm940_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm940_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm940_coherent_kern_range)
+ b arm940_flush_kern_dcache_area
+SYM_FUNC_END(arm940_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -143,8 +152,11 @@ ENTRY(arm940_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm940_coherent_user_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm940_coherent_user_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm940_flush_kern_dcache_area
+#endif
+SYM_FUNC_END(arm940_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -155,7 +167,7 @@ ENTRY(arm940_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(arm940_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm940_flush_kern_dcache_area)
mov r0, #0
mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments
1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
@@ -167,6 +179,7 @@ ENTRY(arm940_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm940_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -199,7 +212,7 @@ arm940_dma_inv_range:
* - end - virtual end address
*/
arm940_dma_clean_range:
-ENTRY(cpu_arm940_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm940_dcache_clean_area)
mov ip, #0
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments
@@ -212,6 +225,7 @@ ENTRY(cpu_arm940_dcache_clean_area)
#endif
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_arm940_dcache_clean_area)
/*
* dma_flush_range(start, end)
@@ -222,7 +236,7 @@ ENTRY(cpu_arm940_dcache_clean_area)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm940_dma_flush_range)
+SYM_TYPED_FUNC_START(arm940_dma_flush_range)
mov ip, #0
mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments
1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
@@ -238,6 +252,7 @@ ENTRY(arm940_dma_flush_range)
bcs 1b @ segments 7 to 0
mcr p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm940_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -245,13 +260,13 @@ ENTRY(arm940_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm940_dma_map_area)
+SYM_TYPED_FUNC_START(arm940_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm940_dma_clean_range
bcs arm940_dma_inv_range
b arm940_dma_flush_range
-ENDPROC(arm940_dma_map_area)
+SYM_FUNC_END(arm940_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -259,15 +274,9 @@ ENDPROC(arm940_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm940_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm940_dma_unmap_area)
ret lr
-ENDPROC(arm940_dma_unmap_area)
-
- .globl arm940_flush_kern_cache_louis
- .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm940
+SYM_FUNC_END(arm940_dma_unmap_area)
.type __arm940_setup, #function
__arm940_setup:
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 6fb3898ad1cd..27750ace2ced 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -32,19 +33,24 @@
*
* These are not required.
*/
-ENTRY(cpu_arm946_proc_init)
-ENTRY(cpu_arm946_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm946_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm946_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm946_switch_mm)
+ ret lr
+SYM_FUNC_END(cpu_arm946_switch_mm)
/*
* cpu_arm946_proc_fin()
*/
-ENTRY(cpu_arm946_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm946_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x00001000 @ i-cache
bic r0, r0, #0x00000004 @ d-cache
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_arm946_proc_fin)
/*
* cpu_arm946_reset(loc)
@@ -52,7 +58,7 @@ ENTRY(cpu_arm946_proc_fin)
* Notes : This sets up everything for a reset
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm946_reset)
+SYM_TYPED_FUNC_START(cpu_arm946_reset)
mov ip, #0
mcr p15, 0, ip, c7, c5, 0 @ flush I cache
mcr p15, 0, ip, c7, c6, 0 @ flush D cache
@@ -62,40 +68,40 @@ ENTRY(cpu_arm946_reset)
bic ip, ip, #0x00001000 @ i-cache
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_arm946_reset)
+SYM_FUNC_END(cpu_arm946_reset)
.popsection
/*
* cpu_arm946_do_idle()
*/
.align 5
-ENTRY(cpu_arm946_do_idle)
+SYM_TYPED_FUNC_START(cpu_arm946_do_idle)
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_arm946_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(arm946_flush_icache_all)
+SYM_TYPED_FUNC_START(arm946_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(arm946_flush_icache_all)
+SYM_FUNC_END(arm946_flush_icache_all)
/*
* flush_user_cache_all()
*/
-ENTRY(arm946_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(arm946_flush_user_cache_all, arm946_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(arm946_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(arm946_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -114,6 +120,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ flush I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm946_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -126,7 +133,7 @@ __flush_whole_cache:
* - flags - vm_flags describing address space
* (same as arm926)
*/
-ENTRY(arm946_flush_user_cache_range)
+SYM_TYPED_FUNC_START(arm946_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -153,6 +160,7 @@ ENTRY(arm946_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm946_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -164,8 +172,11 @@ ENTRY(arm946_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(arm946_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(arm946_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b arm946_coherent_user_range
+#endif
+SYM_FUNC_END(arm946_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -178,7 +189,7 @@ ENTRY(arm946_coherent_kern_range)
* - end - virtual end address
* (same as arm926)
*/
-ENTRY(arm946_coherent_user_range)
+SYM_TYPED_FUNC_START(arm946_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -188,6 +199,7 @@ ENTRY(arm946_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(arm946_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -199,7 +211,7 @@ ENTRY(arm946_coherent_user_range)
* - size - region size
* (same as arm926)
*/
-ENTRY(arm946_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(arm946_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -209,6 +221,7 @@ ENTRY(arm946_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm946_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -268,7 +281,7 @@ arm946_dma_clean_range:
*
* (same as arm926)
*/
-ENTRY(arm946_dma_flush_range)
+SYM_TYPED_FUNC_START(arm946_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1:
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -281,6 +294,7 @@ ENTRY(arm946_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(arm946_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -288,13 +302,13 @@ ENTRY(arm946_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm946_dma_map_area)
+SYM_TYPED_FUNC_START(arm946_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq arm946_dma_clean_range
bcs arm946_dma_inv_range
b arm946_dma_flush_range
-ENDPROC(arm946_dma_map_area)
+SYM_FUNC_END(arm946_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -302,17 +316,11 @@ ENDPROC(arm946_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(arm946_dma_unmap_area)
+SYM_TYPED_FUNC_START(arm946_dma_unmap_area)
ret lr
-ENDPROC(arm946_dma_unmap_area)
-
- .globl arm946_flush_kern_cache_louis
- .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions arm946
+SYM_FUNC_END(arm946_dma_unmap_area)
-ENTRY(cpu_arm946_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_arm946_dcache_clean_area)
#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
@@ -321,6 +329,7 @@ ENTRY(cpu_arm946_dcache_clean_area)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_arm946_dcache_clean_area)
.type __arm946_setup, #function
__arm946_setup:
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index a054c0e9c034..c480a8400eff 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -24,17 +25,28 @@
*
* These are not required.
*/
-ENTRY(cpu_arm9tdmi_proc_init)
-ENTRY(cpu_arm9tdmi_do_idle)
-ENTRY(cpu_arm9tdmi_dcache_clean_area)
-ENTRY(cpu_arm9tdmi_switch_mm)
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_proc_init)
ret lr
+SYM_FUNC_END(cpu_arm9tdmi_proc_init)
+
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_do_idle)
+ ret lr
+SYM_FUNC_END(cpu_arm9tdmi_do_idle)
+
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_dcache_clean_area)
+ ret lr
+SYM_FUNC_END(cpu_arm9tdmi_dcache_clean_area)
+
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_switch_mm)
+ ret lr
+SYM_FUNC_END(cpu_arm9tdmi_switch_mm)
/*
* cpu_arm9tdmi_proc_fin()
*/
-ENTRY(cpu_arm9tdmi_proc_fin)
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_proc_fin)
ret lr
+SYM_FUNC_END(cpu_arm9tdmi_proc_fin)
/*
* Function: cpu_arm9tdmi_reset(loc)
@@ -42,9 +54,9 @@ ENTRY(cpu_arm9tdmi_proc_fin)
* Purpose : Sets up everything for a reset and jump to the location for soft reset.
*/
.pushsection .idmap.text, "ax"
-ENTRY(cpu_arm9tdmi_reset)
+SYM_TYPED_FUNC_START(cpu_arm9tdmi_reset)
ret r0
-ENDPROC(cpu_arm9tdmi_reset)
+SYM_FUNC_END(cpu_arm9tdmi_reset)
.popsection
.type __arm9tdmi_setup, #function
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 2c73e0d47d08..7c16ccac8a05 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -26,13 +27,14 @@
/*
* cpu_fa526_proc_init()
*/
-ENTRY(cpu_fa526_proc_init)
+SYM_TYPED_FUNC_START(cpu_fa526_proc_init)
ret lr
+SYM_FUNC_END(cpu_fa526_proc_init)
/*
* cpu_fa526_proc_fin()
*/
-ENTRY(cpu_fa526_proc_fin)
+SYM_TYPED_FUNC_START(cpu_fa526_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
@@ -40,6 +42,7 @@ ENTRY(cpu_fa526_proc_fin)
nop
nop
ret lr
+SYM_FUNC_END(cpu_fa526_proc_fin)
/*
* cpu_fa526_reset(loc)
@@ -52,7 +55,7 @@ ENTRY(cpu_fa526_proc_fin)
*/
.align 4
.pushsection .idmap.text, "ax"
-ENTRY(cpu_fa526_reset)
+SYM_TYPED_FUNC_START(cpu_fa526_reset)
/* TODO: Use CP8 if possible... */
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
@@ -68,24 +71,25 @@ ENTRY(cpu_fa526_reset)
nop
nop
ret r0
-ENDPROC(cpu_fa526_reset)
+SYM_FUNC_END(cpu_fa526_reset)
.popsection
/*
* cpu_fa526_do_idle()
*/
.align 4
-ENTRY(cpu_fa526_do_idle)
+SYM_TYPED_FUNC_START(cpu_fa526_do_idle)
ret lr
+SYM_FUNC_END(cpu_fa526_do_idle)
-
-ENTRY(cpu_fa526_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_fa526_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_fa526_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -97,7 +101,7 @@ ENTRY(cpu_fa526_dcache_clean_area)
* pgd: new page tables
*/
.align 4
-ENTRY(cpu_fa526_switch_mm)
+SYM_TYPED_FUNC_START(cpu_fa526_switch_mm)
#ifdef CONFIG_MMU
mov ip, #0
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
@@ -113,6 +117,7 @@ ENTRY(cpu_fa526_switch_mm)
mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB
#endif
ret lr
+SYM_FUNC_END(cpu_fa526_switch_mm)
/*
* cpu_fa526_set_pte_ext(ptep, pte, ext)
@@ -120,7 +125,7 @@ ENTRY(cpu_fa526_switch_mm)
* Set a PTE and flush it out
*/
.align 4
-ENTRY(cpu_fa526_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_fa526_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -129,6 +134,7 @@ ENTRY(cpu_fa526_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_fa526_set_pte_ext)
.type __fa526_setup, #function
__fa526_setup:
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 072ff9b451f8..f67b2ffac854 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -8,6 +8,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -43,7 +44,7 @@ __cache_params:
/*
* cpu_feroceon_proc_init()
*/
-ENTRY(cpu_feroceon_proc_init)
+SYM_TYPED_FUNC_START(cpu_feroceon_proc_init)
mrc p15, 0, r0, c0, c0, 1 @ read cache type register
ldr r1, __cache_params
mov r2, #(16 << 5)
@@ -61,11 +62,12 @@ ENTRY(cpu_feroceon_proc_init)
str_l r1, VFP_arch_feroceon, r2
#endif
ret lr
+SYM_FUNC_END(cpu_feroceon_proc_init)
/*
* cpu_feroceon_proc_fin()
*/
-ENTRY(cpu_feroceon_proc_fin)
+SYM_TYPED_FUNC_START(cpu_feroceon_proc_fin)
#if defined(CONFIG_CACHE_FEROCEON_L2) && \
!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
mov r0, #0
@@ -78,6 +80,7 @@ ENTRY(cpu_feroceon_proc_fin)
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_feroceon_proc_fin)
/*
* cpu_feroceon_reset(loc)
@@ -90,7 +93,7 @@ ENTRY(cpu_feroceon_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_feroceon_reset)
+SYM_TYPED_FUNC_START(cpu_feroceon_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -102,7 +105,7 @@ ENTRY(cpu_feroceon_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_feroceon_reset)
+SYM_FUNC_END(cpu_feroceon_reset)
.popsection
/*
@@ -111,22 +114,23 @@ ENDPROC(cpu_feroceon_reset)
* Called with IRQs disabled
*/
.align 5
-ENTRY(cpu_feroceon_do_idle)
+SYM_TYPED_FUNC_START(cpu_feroceon_do_idle)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt
ret lr
+SYM_FUNC_END(cpu_feroceon_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(feroceon_flush_icache_all)
+SYM_TYPED_FUNC_START(feroceon_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(feroceon_flush_icache_all)
+SYM_FUNC_END(feroceon_flush_icache_all)
/*
* flush_user_cache_all()
@@ -135,15 +139,14 @@ ENDPROC(feroceon_flush_icache_all)
* address space.
*/
.align 5
-ENTRY(feroceon_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(feroceon_flush_user_cache_all, feroceon_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(feroceon_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(feroceon_flush_kern_cache_all)
mov r2, #VM_EXEC
__flush_whole_cache:
@@ -161,6 +164,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -173,7 +177,7 @@ __flush_whole_cache:
* - flags - vm_flags describing address space
*/
.align 5
-ENTRY(feroceon_flush_user_cache_range)
+SYM_TYPED_FUNC_START(feroceon_flush_user_cache_range)
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
bgt __flush_whole_cache
@@ -190,6 +194,7 @@ ENTRY(feroceon_flush_user_cache_range)
mov ip, #0
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -202,8 +207,11 @@ ENTRY(feroceon_flush_user_cache_range)
* - end - virtual end address
*/
.align 5
-ENTRY(feroceon_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(feroceon_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b feroceon_coherent_user_range
+#endif
+SYM_FUNC_END(feroceon_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -215,7 +223,7 @@ ENTRY(feroceon_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(feroceon_coherent_user_range)
+SYM_TYPED_FUNC_START(feroceon_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -225,6 +233,7 @@ ENTRY(feroceon_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(feroceon_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -236,7 +245,7 @@ ENTRY(feroceon_coherent_user_range)
* - size - region size
*/
.align 5
-ENTRY(feroceon_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(feroceon_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -246,9 +255,10 @@ ENTRY(feroceon_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_flush_kern_dcache_area)
.align 5
-ENTRY(feroceon_range_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(feroceon_range_flush_kern_dcache_area)
mrs r2, cpsr
add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive
orr r3, r2, #PSR_I_BIT
@@ -260,6 +270,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_range_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -346,7 +357,7 @@ feroceon_range_dma_clean_range:
* - end - virtual end address
*/
.align 5
-ENTRY(feroceon_dma_flush_range)
+SYM_TYPED_FUNC_START(feroceon_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -354,9 +365,10 @@ ENTRY(feroceon_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_dma_flush_range)
.align 5
-ENTRY(feroceon_range_dma_flush_range)
+SYM_TYPED_FUNC_START(feroceon_range_dma_flush_range)
mrs r2, cpsr
cmp r1, r0
subne r1, r1, #1 @ top address is inclusive
@@ -367,6 +379,7 @@ ENTRY(feroceon_range_dma_flush_range)
msr cpsr_c, r2 @ restore interrupts
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(feroceon_range_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -374,13 +387,13 @@ ENTRY(feroceon_range_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(feroceon_dma_map_area)
+SYM_TYPED_FUNC_START(feroceon_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq feroceon_dma_clean_range
bcs feroceon_dma_inv_range
b feroceon_dma_flush_range
-ENDPROC(feroceon_dma_map_area)
+SYM_FUNC_END(feroceon_dma_map_area)
/*
* dma_map_area(start, size, dir)
@@ -388,13 +401,13 @@ ENDPROC(feroceon_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(feroceon_range_dma_map_area)
+SYM_TYPED_FUNC_START(feroceon_range_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq feroceon_range_dma_clean_range
bcs feroceon_range_dma_inv_range
b feroceon_range_dma_flush_range
-ENDPROC(feroceon_range_dma_map_area)
+SYM_FUNC_END(feroceon_range_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -402,39 +415,12 @@ ENDPROC(feroceon_range_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(feroceon_dma_unmap_area)
+SYM_TYPED_FUNC_START(feroceon_dma_unmap_area)
ret lr
-ENDPROC(feroceon_dma_unmap_area)
-
- .globl feroceon_flush_kern_cache_louis
- .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions feroceon
-
-.macro range_alias basename
- .globl feroceon_range_\basename
- .type feroceon_range_\basename , %function
- .equ feroceon_range_\basename , feroceon_\basename
-.endm
-
-/*
- * Most of the cache functions are unchanged for this case.
- * Export suitable alias symbols for the unchanged functions:
- */
- range_alias flush_icache_all
- range_alias flush_user_cache_all
- range_alias flush_kern_cache_all
- range_alias flush_kern_cache_louis
- range_alias flush_user_cache_range
- range_alias coherent_kern_range
- range_alias coherent_user_range
- range_alias dma_unmap_area
-
- define_cache_functions feroceon_range
+SYM_FUNC_END(feroceon_dma_unmap_area)
.align 5
-ENTRY(cpu_feroceon_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_feroceon_dcache_clean_area)
#if defined(CONFIG_CACHE_FEROCEON_L2) && \
!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
mov r2, r0
@@ -453,6 +439,7 @@ ENTRY(cpu_feroceon_dcache_clean_area)
#endif
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_feroceon_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -464,7 +451,7 @@ ENTRY(cpu_feroceon_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_feroceon_switch_mm)
+SYM_TYPED_FUNC_START(cpu_feroceon_switch_mm)
#ifdef CONFIG_MMU
/*
* Note: we wish to call __flush_whole_cache but we need to preserve
@@ -485,6 +472,7 @@ ENTRY(cpu_feroceon_switch_mm)
#else
ret lr
#endif
+SYM_FUNC_END(cpu_feroceon_switch_mm)
/*
* cpu_feroceon_set_pte_ext(ptep, pte, ext)
@@ -492,7 +480,7 @@ ENTRY(cpu_feroceon_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_feroceon_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_feroceon_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
mov r0, r0
@@ -504,21 +492,22 @@ ENTRY(cpu_feroceon_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_feroceon_set_pte_ext)
/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
.globl cpu_feroceon_suspend_size
.equ cpu_feroceon_suspend_size, 4 * 3
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_feroceon_do_suspend)
+SYM_TYPED_FUNC_START(cpu_feroceon_do_suspend)
stmfd sp!, {r4 - r6, lr}
mrc p15, 0, r4, c13, c0, 0 @ PID
mrc p15, 0, r5, c3, c0, 0 @ Domain ID
mrc p15, 0, r6, c1, c0, 0 @ Control register
stmia r0, {r4 - r6}
ldmfd sp!, {r4 - r6, pc}
-ENDPROC(cpu_feroceon_do_suspend)
+SYM_FUNC_END(cpu_feroceon_do_suspend)
-ENTRY(cpu_feroceon_do_resume)
+SYM_TYPED_FUNC_START(cpu_feroceon_do_resume)
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs
mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches
@@ -528,7 +517,7 @@ ENTRY(cpu_feroceon_do_resume)
mcr p15, 0, r1, c2, c0, 0 @ TTB address
mov r0, r6 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_feroceon_do_resume)
+SYM_FUNC_END(cpu_feroceon_do_resume)
#endif
.type __feroceon_setup, #function
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e43f6d716b4b..e388c4cc0c44 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -320,39 +320,6 @@ ENTRY(\name\()_processor_functions)
#endif
.endm
-.macro define_cache_functions name:req
- .align 2
- .type \name\()_cache_fns, #object
-ENTRY(\name\()_cache_fns)
- .long \name\()_flush_icache_all
- .long \name\()_flush_kern_cache_all
- .long \name\()_flush_kern_cache_louis
- .long \name\()_flush_user_cache_all
- .long \name\()_flush_user_cache_range
- .long \name\()_coherent_kern_range
- .long \name\()_coherent_user_range
- .long \name\()_flush_kern_dcache_area
- .long \name\()_dma_map_area
- .long \name\()_dma_unmap_area
- .long \name\()_dma_flush_range
- .size \name\()_cache_fns, . - \name\()_cache_fns
-.endm
-
-.macro define_tlb_functions name:req, flags_up:req, flags_smp
- .type \name\()_tlb_fns, #object
- .align 2
-ENTRY(\name\()_tlb_fns)
- .long \name\()_flush_user_tlb_range
- .long \name\()_flush_kern_tlb_range
- .ifnb \flags_smp
- ALT_SMP(.long \flags_smp )
- ALT_UP(.long \flags_up )
- .else
- .long \flags_up
- .endif
- .size \name\()_tlb_fns, . - \name\()_tlb_fns
-.endm
-
.macro globl_equ x, y
.globl \x
.equ \x, \y
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 1645ccaffe96..8e9f38da863a 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -9,6 +9,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -31,18 +32,20 @@
/*
* cpu_mohawk_proc_init()
*/
-ENTRY(cpu_mohawk_proc_init)
+SYM_TYPED_FUNC_START(cpu_mohawk_proc_init)
ret lr
+SYM_FUNC_END(cpu_mohawk_proc_init)
/*
* cpu_mohawk_proc_fin()
*/
-ENTRY(cpu_mohawk_proc_fin)
+SYM_TYPED_FUNC_START(cpu_mohawk_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1800 @ ...iz...........
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_mohawk_proc_fin)
/*
* cpu_mohawk_reset(loc)
@@ -57,7 +60,7 @@ ENTRY(cpu_mohawk_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_mohawk_reset)
+SYM_TYPED_FUNC_START(cpu_mohawk_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -67,7 +70,7 @@ ENTRY(cpu_mohawk_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_mohawk_reset)
+SYM_FUNC_END(cpu_mohawk_reset)
.popsection
/*
@@ -76,22 +79,23 @@ ENDPROC(cpu_mohawk_reset)
* Called with IRQs disabled
*/
.align 5
-ENTRY(cpu_mohawk_do_idle)
+SYM_TYPED_FUNC_START(cpu_mohawk_do_idle)
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt
ret lr
+SYM_FUNC_END(cpu_mohawk_do_idle)
/*
* flush_icache_all()
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(mohawk_flush_icache_all)
+SYM_TYPED_FUNC_START(mohawk_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(mohawk_flush_icache_all)
+SYM_FUNC_END(mohawk_flush_icache_all)
/*
* flush_user_cache_all()
@@ -99,15 +103,14 @@ ENDPROC(mohawk_flush_icache_all)
* Clean and invalidate all cache entries in a particular
* address space.
*/
-ENTRY(mohawk_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(mohawk_flush_user_cache_all, mohawk_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(mohawk_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(mohawk_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -116,6 +119,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer
ret lr
+SYM_FUNC_END(mohawk_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, flags)
@@ -129,7 +133,7 @@ __flush_whole_cache:
*
* (same as arm926)
*/
-ENTRY(mohawk_flush_user_cache_range)
+SYM_TYPED_FUNC_START(mohawk_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
@@ -146,6 +150,7 @@ ENTRY(mohawk_flush_user_cache_range)
tst r2, #VM_EXEC
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(mohawk_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -157,8 +162,11 @@ ENTRY(mohawk_flush_user_cache_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(mohawk_coherent_kern_range)
- /* FALLTHROUGH */
+SYM_TYPED_FUNC_START(mohawk_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b mohawk_coherent_user_range
+#endif
+SYM_FUNC_END(mohawk_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -172,7 +180,7 @@ ENTRY(mohawk_coherent_kern_range)
*
* (same as arm926)
*/
-ENTRY(mohawk_coherent_user_range)
+SYM_TYPED_FUNC_START(mohawk_coherent_user_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
@@ -182,6 +190,7 @@ ENTRY(mohawk_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov r0, #0
ret lr
+SYM_FUNC_END(mohawk_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -192,7 +201,7 @@ ENTRY(mohawk_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(mohawk_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(mohawk_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
@@ -202,6 +211,7 @@ ENTRY(mohawk_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(mohawk_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -256,7 +266,7 @@ mohawk_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(mohawk_dma_flush_range)
+SYM_TYPED_FUNC_START(mohawk_dma_flush_range)
bic r0, r0, #CACHE_DLINESIZE - 1
1:
mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
@@ -265,6 +275,7 @@ ENTRY(mohawk_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(mohawk_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -272,13 +283,13 @@ ENTRY(mohawk_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(mohawk_dma_map_area)
+SYM_TYPED_FUNC_START(mohawk_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq mohawk_dma_clean_range
bcs mohawk_dma_inv_range
b mohawk_dma_flush_range
-ENDPROC(mohawk_dma_map_area)
+SYM_FUNC_END(mohawk_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -286,23 +297,18 @@ ENDPROC(mohawk_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(mohawk_dma_unmap_area)
+SYM_TYPED_FUNC_START(mohawk_dma_unmap_area)
ret lr
-ENDPROC(mohawk_dma_unmap_area)
-
- .globl mohawk_flush_kern_cache_louis
- .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions mohawk
+SYM_FUNC_END(mohawk_dma_unmap_area)
-ENTRY(cpu_mohawk_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_mohawk_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
subs r1, r1, #CACHE_DLINESIZE
bhi 1b
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
+SYM_FUNC_END(cpu_mohawk_dcache_clean_area)
/*
* cpu_mohawk_switch_mm(pgd)
@@ -312,7 +318,7 @@ ENTRY(cpu_mohawk_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_mohawk_switch_mm)
+SYM_TYPED_FUNC_START(cpu_mohawk_switch_mm)
mov ip, #0
mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
@@ -321,6 +327,7 @@ ENTRY(cpu_mohawk_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ret lr
+SYM_FUNC_END(cpu_mohawk_switch_mm)
/*
* cpu_mohawk_set_pte_ext(ptep, pte, ext)
@@ -328,7 +335,7 @@ ENTRY(cpu_mohawk_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_mohawk_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_mohawk_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext
mov r0, r0
@@ -336,11 +343,12 @@ ENTRY(cpu_mohawk_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
ret lr
#endif
+SYM_FUNC_END(cpu_mohawk_set_pte_ext)
.globl cpu_mohawk_suspend_size
.equ cpu_mohawk_suspend_size, 4 * 6
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_mohawk_do_suspend)
+SYM_TYPED_FUNC_START(cpu_mohawk_do_suspend)
stmfd sp!, {r4 - r9, lr}
mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode
mrc p15, 0, r5, c15, c1, 0 @ CP access reg
@@ -351,9 +359,9 @@ ENTRY(cpu_mohawk_do_suspend)
bic r4, r4, #2 @ clear frequency change bit
stmia r0, {r4 - r9} @ store cp regs
ldmia sp!, {r4 - r9, pc}
-ENDPROC(cpu_mohawk_do_suspend)
+SYM_FUNC_END(cpu_mohawk_do_suspend)
-ENTRY(cpu_mohawk_do_resume)
+SYM_TYPED_FUNC_START(cpu_mohawk_do_resume)
ldmia r0, {r4 - r9} @ load cp regs
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB
@@ -369,7 +377,7 @@ ENTRY(cpu_mohawk_do_resume)
mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg
mov r0, r9 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_mohawk_do_resume)
+SYM_FUNC_END(cpu_mohawk_do_resume)
#endif
.type __mohawk_setup, #function
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 4071f7a61cb6..3da76fab8ac3 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -12,6 +12,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -32,15 +33,16 @@
/*
* cpu_sa110_proc_init()
*/
-ENTRY(cpu_sa110_proc_init)
+SYM_TYPED_FUNC_START(cpu_sa110_proc_init)
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
ret lr
+SYM_FUNC_END(cpu_sa110_proc_init)
/*
* cpu_sa110_proc_fin()
*/
-ENTRY(cpu_sa110_proc_fin)
+SYM_TYPED_FUNC_START(cpu_sa110_proc_fin)
mov r0, #0
mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
@@ -48,6 +50,7 @@ ENTRY(cpu_sa110_proc_fin)
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_sa110_proc_fin)
/*
* cpu_sa110_reset(loc)
@@ -60,7 +63,7 @@ ENTRY(cpu_sa110_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_sa110_reset)
+SYM_TYPED_FUNC_START(cpu_sa110_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -72,7 +75,7 @@ ENTRY(cpu_sa110_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_sa110_reset)
+SYM_FUNC_END(cpu_sa110_reset)
.popsection
/*
@@ -88,7 +91,7 @@ ENDPROC(cpu_sa110_reset)
*/
.align 5
-ENTRY(cpu_sa110_do_idle)
+SYM_TYPED_FUNC_START(cpu_sa110_do_idle)
mcr p15, 0, ip, c15, c2, 2 @ disable clock switching
ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc
ldr r1, [r1, #0] @ force switch to MCLK
@@ -101,6 +104,7 @@ ENTRY(cpu_sa110_do_idle)
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
ret lr
+SYM_FUNC_END(cpu_sa110_do_idle)
/* ================================= CACHE ================================ */
@@ -113,12 +117,13 @@ ENTRY(cpu_sa110_do_idle)
* addr: cache-unaligned virtual address
*/
.align 5
-ENTRY(cpu_sa110_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_sa110_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_sa110_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -130,7 +135,7 @@ ENTRY(cpu_sa110_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_sa110_switch_mm)
+SYM_TYPED_FUNC_START(cpu_sa110_switch_mm)
#ifdef CONFIG_MMU
str lr, [sp, #-4]!
bl v4wb_flush_kern_cache_all @ clears IP
@@ -140,6 +145,7 @@ ENTRY(cpu_sa110_switch_mm)
#else
ret lr
#endif
+SYM_FUNC_END(cpu_sa110_switch_mm)
/*
* cpu_sa110_set_pte_ext(ptep, pte, ext)
@@ -147,7 +153,7 @@ ENTRY(cpu_sa110_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_sa110_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_sa110_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
mov r0, r0
@@ -155,6 +161,7 @@ ENTRY(cpu_sa110_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_sa110_set_pte_ext)
.type __sa110_setup, #function
__sa110_setup:
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index e723bd4119d3..7c496195e440 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
@@ -36,11 +37,12 @@
/*
* cpu_sa1100_proc_init()
*/
-ENTRY(cpu_sa1100_proc_init)
+SYM_TYPED_FUNC_START(cpu_sa1100_proc_init)
mov r0, #0
mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching
mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland
ret lr
+SYM_FUNC_END(cpu_sa1100_proc_init)
/*
* cpu_sa1100_proc_fin()
@@ -49,13 +51,14 @@ ENTRY(cpu_sa1100_proc_init)
* - Disable interrupts
* - Clean and turn off caches.
*/
-ENTRY(cpu_sa1100_proc_fin)
+SYM_TYPED_FUNC_START(cpu_sa1100_proc_fin)
mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x000e @ ............wca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_sa1100_proc_fin)
/*
* cpu_sa1100_reset(loc)
@@ -68,7 +71,7 @@ ENTRY(cpu_sa1100_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_sa1100_reset)
+SYM_TYPED_FUNC_START(cpu_sa1100_reset)
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches
mcr p15, 0, ip, c7, c10, 4 @ drain WB
@@ -80,7 +83,7 @@ ENTRY(cpu_sa1100_reset)
bic ip, ip, #0x1100 @ ...i...s........
mcr p15, 0, ip, c1, c0, 0 @ ctrl register
ret r0
-ENDPROC(cpu_sa1100_reset)
+SYM_FUNC_END(cpu_sa1100_reset)
.popsection
/*
@@ -95,7 +98,7 @@ ENDPROC(cpu_sa1100_reset)
* 3 = switch to fast processor clock
*/
.align 5
-ENTRY(cpu_sa1100_do_idle)
+SYM_TYPED_FUNC_START(cpu_sa1100_do_idle)
mov r0, r0 @ 4 nop padding
mov r0, r0
mov r0, r0
@@ -111,6 +114,7 @@ ENTRY(cpu_sa1100_do_idle)
mov r0, r0 @ safety
mcr p15, 0, r0, c15, c1, 2 @ enable clock switching
ret lr
+SYM_FUNC_END(cpu_sa1100_do_idle)
/* ================================= CACHE ================================ */
@@ -123,12 +127,13 @@ ENTRY(cpu_sa1100_do_idle)
* addr: cache-unaligned virtual address
*/
.align 5
-ENTRY(cpu_sa1100_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_sa1100_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #DCACHELINESIZE
subs r1, r1, #DCACHELINESIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_sa1100_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -140,7 +145,7 @@ ENTRY(cpu_sa1100_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_sa1100_switch_mm)
+SYM_TYPED_FUNC_START(cpu_sa1100_switch_mm)
#ifdef CONFIG_MMU
str lr, [sp, #-4]!
bl v4wb_flush_kern_cache_all @ clears IP
@@ -151,6 +156,7 @@ ENTRY(cpu_sa1100_switch_mm)
#else
ret lr
#endif
+SYM_FUNC_END(cpu_sa1100_switch_mm)
/*
* cpu_sa1100_set_pte_ext(ptep, pte, ext)
@@ -158,7 +164,7 @@ ENTRY(cpu_sa1100_switch_mm)
* Set a PTE and flush it out
*/
.align 5
-ENTRY(cpu_sa1100_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_sa1100_set_pte_ext)
#ifdef CONFIG_MMU
armv3_set_pte_ext wc_disable=0
mov r0, r0
@@ -166,20 +172,21 @@ ENTRY(cpu_sa1100_set_pte_ext)
mcr p15, 0, r0, c7, c10, 4 @ drain WB
#endif
ret lr
+SYM_FUNC_END(cpu_sa1100_set_pte_ext)
.globl cpu_sa1100_suspend_size
.equ cpu_sa1100_suspend_size, 4 * 3
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_sa1100_do_suspend)
+SYM_TYPED_FUNC_START(cpu_sa1100_do_suspend)
stmfd sp!, {r4 - r6, lr}
mrc p15, 0, r4, c3, c0, 0 @ domain ID
mrc p15, 0, r5, c13, c0, 0 @ PID
mrc p15, 0, r6, c1, c0, 0 @ control reg
stmia r0, {r4 - r6} @ store cp regs
ldmfd sp!, {r4 - r6, pc}
-ENDPROC(cpu_sa1100_do_suspend)
+SYM_FUNC_END(cpu_sa1100_do_suspend)
-ENTRY(cpu_sa1100_do_resume)
+SYM_TYPED_FUNC_START(cpu_sa1100_do_resume)
ldmia r0, {r4 - r6} @ load cp regs
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs
@@ -192,7 +199,7 @@ ENTRY(cpu_sa1100_do_resume)
mcr p15, 0, r5, c13, c0, 0 @ PID
mov r0, r6 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_sa1100_do_resume)
+SYM_FUNC_END(cpu_sa1100_do_resume)
#endif
.type __sa1100_setup, #function
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 203dff89ab1a..90a01f5950b9 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -8,6 +8,7 @@
* This is the "shell" of the ARMv6 processor support.
*/
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/linkage.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
@@ -34,15 +35,17 @@
.arch armv6
-ENTRY(cpu_v6_proc_init)
+SYM_TYPED_FUNC_START(cpu_v6_proc_init)
ret lr
+SYM_FUNC_END(cpu_v6_proc_init)
-ENTRY(cpu_v6_proc_fin)
+SYM_TYPED_FUNC_START(cpu_v6_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_v6_proc_fin)
/*
* cpu_v6_reset(loc)
@@ -55,14 +58,14 @@ ENTRY(cpu_v6_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_v6_reset)
+SYM_TYPED_FUNC_START(cpu_v6_reset)
mrc p15, 0, r1, c1, c0, 0 @ ctrl register
bic r1, r1, #0x1 @ ...............m
mcr p15, 0, r1, c1, c0, 0 @ disable MMU
mov r1, #0
mcr p15, 0, r1, c7, c5, 4 @ ISB
ret r0
-ENDPROC(cpu_v6_reset)
+SYM_FUNC_END(cpu_v6_reset)
.popsection
/*
@@ -72,18 +75,20 @@ ENDPROC(cpu_v6_reset)
*
* IRQs are already disabled.
*/
-ENTRY(cpu_v6_do_idle)
+SYM_TYPED_FUNC_START(cpu_v6_do_idle)
mov r1, #0
mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode
mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt
ret lr
+SYM_FUNC_END(cpu_v6_do_idle)
-ENTRY(cpu_v6_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_v6_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #D_CACHE_LINE_SIZE
subs r1, r1, #D_CACHE_LINE_SIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_v6_dcache_clean_area)
/*
* cpu_v6_switch_mm(pgd_phys, tsk)
@@ -95,7 +100,7 @@ ENTRY(cpu_v6_dcache_clean_area)
* It is assumed that:
* - we are not using split page tables
*/
-ENTRY(cpu_v6_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v6_switch_mm)
#ifdef CONFIG_MMU
mov r2, #0
mmid r1, r1 @ get mm->context.id
@@ -113,6 +118,7 @@ ENTRY(cpu_v6_switch_mm)
mcr p15, 0, r1, c13, c0, 1 @ set context ID
#endif
ret lr
+SYM_FUNC_END(cpu_v6_switch_mm)
/*
* cpu_v6_set_pte_ext(ptep, pte, ext)
@@ -126,17 +132,18 @@ ENTRY(cpu_v6_switch_mm)
*/
armv6_mt_table cpu_v6
-ENTRY(cpu_v6_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_v6_set_pte_ext)
#ifdef CONFIG_MMU
armv6_set_pte_ext cpu_v6
#endif
ret lr
+SYM_FUNC_END(cpu_v6_set_pte_ext)
/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
.globl cpu_v6_suspend_size
.equ cpu_v6_suspend_size, 4 * 6
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v6_do_suspend)
+SYM_TYPED_FUNC_START(cpu_v6_do_suspend)
stmfd sp!, {r4 - r9, lr}
mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID
#ifdef CONFIG_MMU
@@ -148,9 +155,9 @@ ENTRY(cpu_v6_do_suspend)
mrc p15, 0, r9, c1, c0, 0 @ control register
stmia r0, {r4 - r9}
ldmfd sp!, {r4- r9, pc}
-ENDPROC(cpu_v6_do_suspend)
+SYM_FUNC_END(cpu_v6_do_suspend)
-ENTRY(cpu_v6_do_resume)
+SYM_TYPED_FUNC_START(cpu_v6_do_resume)
mov ip, #0
mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
@@ -172,7 +179,7 @@ ENTRY(cpu_v6_do_resume)
mcr p15, 0, ip, c7, c5, 4 @ ISB
mov r0, r9 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_v6_do_resume)
+SYM_FUNC_END(cpu_v6_do_resume)
#endif
string cpu_v6_name, "ARMv6-compatible processor"
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 0a3083ad19c2..1007702fcaf3 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -40,7 +40,7 @@
* even on Cortex-A8 revisions not affected by 430973.
* If IBE is not set, the flush BTAC/BTB won't do anything.
*/
-ENTRY(cpu_v7_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7_switch_mm)
#ifdef CONFIG_MMU
mmid r1, r1 @ get mm->context.id
ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP)
@@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm)
isb
#endif
bx lr
-ENDPROC(cpu_v7_switch_mm)
+SYM_FUNC_END(cpu_v7_switch_mm)
/*
* cpu_v7_set_pte_ext(ptep, pte)
@@ -71,7 +71,7 @@ ENDPROC(cpu_v7_switch_mm)
* - pte - PTE value to store
* - ext - value for extended PTE bits
*/
-ENTRY(cpu_v7_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_v7_set_pte_ext)
#ifdef CONFIG_MMU
str r1, [r0] @ linux version
@@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext)
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
bx lr
-ENDPROC(cpu_v7_set_pte_ext)
+SYM_FUNC_END(cpu_v7_set_pte_ext)
/*
* Memory region attributes with SCTLR.TRE=1
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 131984462d0d..bdabc15cde56 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -42,7 +42,7 @@
* Set the translation table base pointer to be pgd_phys (physical address of
* the new TTB).
*/
-ENTRY(cpu_v7_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7_switch_mm)
#ifdef CONFIG_MMU
mmid r2, r2
asid r2, r2
@@ -51,7 +51,7 @@ ENTRY(cpu_v7_switch_mm)
isb
#endif
ret lr
-ENDPROC(cpu_v7_switch_mm)
+SYM_FUNC_END(cpu_v7_switch_mm)
#ifdef __ARMEB__
#define rl r3
@@ -68,7 +68,7 @@ ENDPROC(cpu_v7_switch_mm)
* - ptep - pointer to level 3 translation table entry
* - pte - PTE value to store (64-bit in r2 and r3)
*/
-ENTRY(cpu_v7_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_v7_set_pte_ext)
#ifdef CONFIG_MMU
tst rl, #L_PTE_VALID
beq 1f
@@ -87,7 +87,7 @@ ENTRY(cpu_v7_set_pte_ext)
ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte
#endif
ret lr
-ENDPROC(cpu_v7_set_pte_ext)
+SYM_FUNC_END(cpu_v7_set_pte_ext)
/*
* Memory region attributes for LPAE (defined in pgtable-3level.h):
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 193c7aeb6703..5fb9a6aecb00 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -7,6 +7,7 @@
* This is the "shell" of the ARMv7 processor support.
*/
#include <linux/arm-smccc.h>
+#include <linux/cfi_types.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/pgtable.h>
@@ -26,17 +27,17 @@
.arch armv7-a
-ENTRY(cpu_v7_proc_init)
+SYM_TYPED_FUNC_START(cpu_v7_proc_init)
ret lr
-ENDPROC(cpu_v7_proc_init)
+SYM_FUNC_END(cpu_v7_proc_init)
-ENTRY(cpu_v7_proc_fin)
+SYM_TYPED_FUNC_START(cpu_v7_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1000 @ ...i............
bic r0, r0, #0x0006 @ .............ca.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
-ENDPROC(cpu_v7_proc_fin)
+SYM_FUNC_END(cpu_v7_proc_fin)
/*
* cpu_v7_reset(loc, hyp)
@@ -53,7 +54,7 @@ ENDPROC(cpu_v7_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_v7_reset)
+SYM_TYPED_FUNC_START(cpu_v7_reset)
mrc p15, 0, r2, c1, c0, 0 @ ctrl register
bic r2, r2, #0x1 @ ...............m
THUMB( bic r2, r2, #1 << 30 ) @ SCTLR.TE (Thumb exceptions)
@@ -64,7 +65,7 @@ ENTRY(cpu_v7_reset)
bne __hyp_soft_restart
#endif
bx r0
-ENDPROC(cpu_v7_reset)
+SYM_FUNC_END(cpu_v7_reset)
.popsection
/*
@@ -74,13 +75,13 @@ ENDPROC(cpu_v7_reset)
*
* IRQs are already disabled.
*/
-ENTRY(cpu_v7_do_idle)
+SYM_TYPED_FUNC_START(cpu_v7_do_idle)
dsb @ WFI may enter a low-power mode
wfi
ret lr
-ENDPROC(cpu_v7_do_idle)
+SYM_FUNC_END(cpu_v7_do_idle)
-ENTRY(cpu_v7_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_v7_dcache_clean_area)
ALT_SMP(W(nop)) @ MP extensions imply L1 PTW
ALT_UP_B(1f)
ret lr
@@ -91,38 +92,39 @@ ENTRY(cpu_v7_dcache_clean_area)
bhi 2b
dsb ishst
ret lr
-ENDPROC(cpu_v7_dcache_clean_area)
+SYM_FUNC_END(cpu_v7_dcache_clean_area)
#ifdef CONFIG_ARM_PSCI
.arch_extension sec
-ENTRY(cpu_v7_smc_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7_smc_switch_mm)
stmfd sp!, {r0 - r3}
movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
smc #0
ldmfd sp!, {r0 - r3}
b cpu_v7_switch_mm
-ENDPROC(cpu_v7_smc_switch_mm)
+SYM_FUNC_END(cpu_v7_smc_switch_mm)
.arch_extension virt
-ENTRY(cpu_v7_hvc_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7_hvc_switch_mm)
stmfd sp!, {r0 - r3}
movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
hvc #0
ldmfd sp!, {r0 - r3}
b cpu_v7_switch_mm
-ENDPROC(cpu_v7_hvc_switch_mm)
+SYM_FUNC_END(cpu_v7_hvc_switch_mm)
#endif
-ENTRY(cpu_v7_iciallu_switch_mm)
+
+SYM_TYPED_FUNC_START(cpu_v7_iciallu_switch_mm)
mov r3, #0
mcr p15, 0, r3, c7, c5, 0 @ ICIALLU
b cpu_v7_switch_mm
-ENDPROC(cpu_v7_iciallu_switch_mm)
-ENTRY(cpu_v7_bpiall_switch_mm)
+SYM_FUNC_END(cpu_v7_iciallu_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7_bpiall_switch_mm)
mov r3, #0
mcr p15, 0, r3, c7, c5, 6 @ flush BTAC/BTB
b cpu_v7_switch_mm
-ENDPROC(cpu_v7_bpiall_switch_mm)
+SYM_FUNC_END(cpu_v7_bpiall_switch_mm)
string cpu_v7_name, "ARMv7 Processor"
.align
@@ -131,7 +133,7 @@ ENDPROC(cpu_v7_bpiall_switch_mm)
.globl cpu_v7_suspend_size
.equ cpu_v7_suspend_size, 4 * 9
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v7_do_suspend)
+SYM_TYPED_FUNC_START(cpu_v7_do_suspend)
stmfd sp!, {r4 - r11, lr}
mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID
mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID
@@ -150,9 +152,9 @@ ENTRY(cpu_v7_do_suspend)
mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control
stmia r0, {r5 - r11}
ldmfd sp!, {r4 - r11, pc}
-ENDPROC(cpu_v7_do_suspend)
+SYM_FUNC_END(cpu_v7_do_suspend)
-ENTRY(cpu_v7_do_resume)
+SYM_TYPED_FUNC_START(cpu_v7_do_resume)
mov ip, #0
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID
@@ -186,22 +188,22 @@ ENTRY(cpu_v7_do_resume)
dsb
mov r0, r8 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_v7_do_resume)
+SYM_FUNC_END(cpu_v7_do_resume)
#endif
.globl cpu_ca9mp_suspend_size
.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_ca9mp_do_suspend)
+SYM_TYPED_FUNC_START(cpu_ca9mp_do_suspend)
stmfd sp!, {r4 - r5}
mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register
mrc p15, 0, r5, c15, c0, 0 @ Power register
stmia r0!, {r4 - r5}
ldmfd sp!, {r4 - r5}
b cpu_v7_do_suspend
-ENDPROC(cpu_ca9mp_do_suspend)
+SYM_FUNC_END(cpu_ca9mp_do_suspend)
-ENTRY(cpu_ca9mp_do_resume)
+SYM_TYPED_FUNC_START(cpu_ca9mp_do_resume)
ldmia r0!, {r4 - r5}
mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register
teq r4, r10 @ Already restored?
@@ -210,7 +212,7 @@ ENTRY(cpu_ca9mp_do_resume)
teq r5, r10 @ Already restored?
mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it
b cpu_v7_do_resume
-ENDPROC(cpu_ca9mp_do_resume)
+SYM_FUNC_END(cpu_ca9mp_do_resume)
#endif
#ifdef CONFIG_CPU_PJ4B
@@ -220,18 +222,18 @@ ENDPROC(cpu_ca9mp_do_resume)
globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin
globl_equ cpu_pj4b_reset, cpu_v7_reset
#ifdef CONFIG_PJ4B_ERRATA_4742
-ENTRY(cpu_pj4b_do_idle)
+SYM_TYPED_FUNC_START(cpu_pj4b_do_idle)
dsb @ WFI may enter a low-power mode
wfi
dsb @barrier
ret lr
-ENDPROC(cpu_pj4b_do_idle)
+SYM_FUNC_END(cpu_pj4b_do_idle)
#else
globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle
#endif
globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_pj4b_do_suspend)
+SYM_TYPED_FUNC_START(cpu_pj4b_do_suspend)
stmfd sp!, {r6 - r10}
mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features
mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0
@@ -241,9 +243,9 @@ ENTRY(cpu_pj4b_do_suspend)
stmia r0!, {r6 - r10}
ldmfd sp!, {r6 - r10}
b cpu_v7_do_suspend
-ENDPROC(cpu_pj4b_do_suspend)
+SYM_FUNC_END(cpu_pj4b_do_suspend)
-ENTRY(cpu_pj4b_do_resume)
+SYM_TYPED_FUNC_START(cpu_pj4b_do_resume)
ldmia r0!, {r6 - r10}
mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features
mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0
@@ -251,7 +253,7 @@ ENTRY(cpu_pj4b_do_resume)
mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1
mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC
b cpu_v7_do_resume
-ENDPROC(cpu_pj4b_do_resume)
+SYM_FUNC_END(cpu_pj4b_do_resume)
#endif
.globl cpu_pj4b_suspend_size
.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index d65a12f851a9..d4675603593b 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -8,18 +8,19 @@
* This is the "shell" of the ARMv7-M processor support.
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include <asm/v7m.h>
#include "proc-macros.S"
-ENTRY(cpu_v7m_proc_init)
+SYM_TYPED_FUNC_START(cpu_v7m_proc_init)
ret lr
-ENDPROC(cpu_v7m_proc_init)
+SYM_FUNC_END(cpu_v7m_proc_init)
-ENTRY(cpu_v7m_proc_fin)
+SYM_TYPED_FUNC_START(cpu_v7m_proc_fin)
ret lr
-ENDPROC(cpu_v7m_proc_fin)
+SYM_FUNC_END(cpu_v7m_proc_fin)
/*
* cpu_v7m_reset(loc)
@@ -31,9 +32,9 @@ ENDPROC(cpu_v7m_proc_fin)
* - loc - location to jump to for soft reset
*/
.align 5
-ENTRY(cpu_v7m_reset)
+SYM_TYPED_FUNC_START(cpu_v7m_reset)
ret r0
-ENDPROC(cpu_v7m_reset)
+SYM_FUNC_END(cpu_v7m_reset)
/*
* cpu_v7m_do_idle()
@@ -42,36 +43,36 @@ ENDPROC(cpu_v7m_reset)
*
* IRQs are already disabled.
*/
-ENTRY(cpu_v7m_do_idle)
+SYM_TYPED_FUNC_START(cpu_v7m_do_idle)
wfi
ret lr
-ENDPROC(cpu_v7m_do_idle)
+SYM_FUNC_END(cpu_v7m_do_idle)
-ENTRY(cpu_v7m_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_v7m_dcache_clean_area)
ret lr
-ENDPROC(cpu_v7m_dcache_clean_area)
+SYM_FUNC_END(cpu_v7m_dcache_clean_area)
/*
* There is no MMU, so here is nothing to do.
*/
-ENTRY(cpu_v7m_switch_mm)
+SYM_TYPED_FUNC_START(cpu_v7m_switch_mm)
ret lr
-ENDPROC(cpu_v7m_switch_mm)
+SYM_FUNC_END(cpu_v7m_switch_mm)
.globl cpu_v7m_suspend_size
.equ cpu_v7m_suspend_size, 0
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v7m_do_suspend)
+SYM_TYPED_FUNC_START(cpu_v7m_do_suspend)
ret lr
-ENDPROC(cpu_v7m_do_suspend)
+SYM_FUNC_END(cpu_v7m_do_suspend)
-ENTRY(cpu_v7m_do_resume)
+SYM_TYPED_FUNC_START(cpu_v7m_do_resume)
ret lr
-ENDPROC(cpu_v7m_do_resume)
+SYM_FUNC_END(cpu_v7m_do_resume)
#endif
-ENTRY(cpu_cm7_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_cm7_dcache_clean_area)
dcache_line_size r2, r3
movw r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
movt r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
@@ -82,16 +83,16 @@ ENTRY(cpu_cm7_dcache_clean_area)
bhi 1b
dsb
ret lr
-ENDPROC(cpu_cm7_dcache_clean_area)
+SYM_FUNC_END(cpu_cm7_dcache_clean_area)
-ENTRY(cpu_cm7_proc_fin)
+SYM_TYPED_FUNC_START(cpu_cm7_proc_fin)
movw r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
movt r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
ldr r0, [r2]
bic r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC)
str r0, [r2]
ret lr
-ENDPROC(cpu_cm7_proc_fin)
+SYM_FUNC_END(cpu_cm7_proc_fin)
.section ".init.text", "ax"
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index a17afe7e195a..14927b380452 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -23,6 +23,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -79,18 +80,20 @@
*
* Nothing too exciting at the moment
*/
-ENTRY(cpu_xsc3_proc_init)
+SYM_TYPED_FUNC_START(cpu_xsc3_proc_init)
ret lr
+SYM_FUNC_END(cpu_xsc3_proc_init)
/*
* cpu_xsc3_proc_fin()
*/
-ENTRY(cpu_xsc3_proc_fin)
+SYM_TYPED_FUNC_START(cpu_xsc3_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_xsc3_proc_fin)
/*
* cpu_xsc3_reset(loc)
@@ -103,7 +106,7 @@ ENTRY(cpu_xsc3_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_xsc3_reset)
+SYM_TYPED_FUNC_START(cpu_xsc3_reset)
mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr cpsr_c, r1 @ reset CPSR
mrc p15, 0, r1, c1, c0, 0 @ ctrl register
@@ -117,7 +120,7 @@ ENTRY(cpu_xsc3_reset)
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
ret r0
-ENDPROC(cpu_xsc3_reset)
+SYM_FUNC_END(cpu_xsc3_reset)
.popsection
/*
@@ -132,10 +135,11 @@ ENDPROC(cpu_xsc3_reset)
*/
.align 5
-ENTRY(cpu_xsc3_do_idle)
+SYM_TYPED_FUNC_START(cpu_xsc3_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ go to idle
ret lr
+SYM_FUNC_END(cpu_xsc3_do_idle)
/* ================================= CACHE ================================ */
@@ -144,11 +148,11 @@ ENTRY(cpu_xsc3_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(xsc3_flush_icache_all)
+SYM_TYPED_FUNC_START(xsc3_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(xsc3_flush_icache_all)
+SYM_FUNC_END(xsc3_flush_icache_all)
/*
* flush_user_cache_all()
@@ -156,15 +160,14 @@ ENDPROC(xsc3_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(xsc3_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(xsc3_flush_user_cache_all, xsc3_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(xsc3_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(xsc3_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -174,6 +177,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(xsc3_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, vm_flags)
@@ -186,7 +190,7 @@ __flush_whole_cache:
* - vma - vma_area_struct describing address space
*/
.align 5
-ENTRY(xsc3_flush_user_cache_range)
+SYM_TYPED_FUNC_START(xsc3_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #MAX_AREA_SIZE
@@ -203,6 +207,7 @@ ENTRY(xsc3_flush_user_cache_range)
mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(xsc3_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -217,9 +222,13 @@ ENTRY(xsc3_flush_user_cache_range)
* Note: single I-cache line invalidation isn't used here since
* it also trashes the mini I-cache used by JTAG debuggers.
*/
-ENTRY(xsc3_coherent_kern_range)
-/* FALLTHROUGH */
-ENTRY(xsc3_coherent_user_range)
+SYM_TYPED_FUNC_START(xsc3_coherent_kern_range)
+#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */
+ b xsc3_coherent_user_range
+#endif
+SYM_FUNC_END(xsc3_coherent_kern_range)
+
+SYM_TYPED_FUNC_START(xsc3_coherent_user_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
add r0, r0, #CACHELINESIZE
@@ -230,6 +239,7 @@ ENTRY(xsc3_coherent_user_range)
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(xsc3_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -240,7 +250,7 @@ ENTRY(xsc3_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(xsc3_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(xsc3_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line
add r0, r0, #CACHELINESIZE
@@ -251,6 +261,7 @@ ENTRY(xsc3_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
ret lr
+SYM_FUNC_END(xsc3_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -301,7 +312,7 @@ xsc3_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(xsc3_dma_flush_range)
+SYM_TYPED_FUNC_START(xsc3_dma_flush_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line
add r0, r0, #CACHELINESIZE
@@ -309,6 +320,7 @@ ENTRY(xsc3_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ data write barrier
ret lr
+SYM_FUNC_END(xsc3_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -316,13 +328,13 @@ ENTRY(xsc3_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(xsc3_dma_map_area)
+SYM_TYPED_FUNC_START(xsc3_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq xsc3_dma_clean_range
bcs xsc3_dma_inv_range
b xsc3_dma_flush_range
-ENDPROC(xsc3_dma_map_area)
+SYM_FUNC_END(xsc3_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -330,22 +342,17 @@ ENDPROC(xsc3_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(xsc3_dma_unmap_area)
+SYM_TYPED_FUNC_START(xsc3_dma_unmap_area)
ret lr
-ENDPROC(xsc3_dma_unmap_area)
-
- .globl xsc3_flush_kern_cache_louis
- .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions xsc3
+SYM_FUNC_END(xsc3_dma_unmap_area)
-ENTRY(cpu_xsc3_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_xsc3_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_xsc3_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -357,7 +364,7 @@ ENTRY(cpu_xsc3_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_xsc3_switch_mm)
+SYM_TYPED_FUNC_START(cpu_xsc3_switch_mm)
clean_d_cache r1, r2
mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB
mcr p15, 0, ip, c7, c10, 4 @ data write barrier
@@ -366,6 +373,7 @@ ENTRY(cpu_xsc3_switch_mm)
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs
cpwait_ret lr, ip
+SYM_FUNC_END(cpu_xsc3_switch_mm)
/*
* cpu_xsc3_set_pte_ext(ptep, pte, ext)
@@ -391,7 +399,7 @@ cpu_xsc3_mt_table:
.long 0x00 @ unused
.align 5
-ENTRY(cpu_xsc3_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_xsc3_set_pte_ext)
xscale_set_pte_ext_prologue
tst r1, #L_PTE_SHARED @ shared?
@@ -404,6 +412,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
xscale_set_pte_ext_epilogue
ret lr
+SYM_FUNC_END(cpu_xsc3_set_pte_ext)
.ltorg
.align
@@ -411,7 +420,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
.globl cpu_xsc3_suspend_size
.equ cpu_xsc3_suspend_size, 4 * 6
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_xsc3_do_suspend)
+SYM_TYPED_FUNC_START(cpu_xsc3_do_suspend)
stmfd sp!, {r4 - r9, lr}
mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode
mrc p15, 0, r5, c15, c1, 0 @ CP access reg
@@ -422,9 +431,9 @@ ENTRY(cpu_xsc3_do_suspend)
bic r4, r4, #2 @ clear frequency change bit
stmia r0, {r4 - r9} @ store cp regs
ldmia sp!, {r4 - r9, pc}
-ENDPROC(cpu_xsc3_do_suspend)
+SYM_FUNC_END(cpu_xsc3_do_suspend)
-ENTRY(cpu_xsc3_do_resume)
+SYM_TYPED_FUNC_START(cpu_xsc3_do_resume)
ldmia r0, {r4 - r9} @ load cp regs
mov ip, #0
mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB
@@ -440,7 +449,7 @@ ENTRY(cpu_xsc3_do_resume)
mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg
mov r0, r9 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_xsc3_do_resume)
+SYM_FUNC_END(cpu_xsc3_do_resume)
#endif
.type __xsc3_setup, #function
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index d82590aa71c0..d8462df8020b 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -19,6 +19,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <linux/pgtable.h>
#include <asm/assembler.h>
#include <asm/hwcap.h>
@@ -111,22 +112,24 @@ clean_addr: .word CLEAN_ADDR
*
* Nothing too exciting at the moment
*/
-ENTRY(cpu_xscale_proc_init)
+SYM_TYPED_FUNC_START(cpu_xscale_proc_init)
@ enable write buffer coalescing. Some bootloader disable it
mrc p15, 0, r1, c1, c0, 1
bic r1, r1, #1
mcr p15, 0, r1, c1, c0, 1
ret lr
+SYM_FUNC_END(cpu_xscale_proc_init)
/*
* cpu_xscale_proc_fin()
*/
-ENTRY(cpu_xscale_proc_fin)
+SYM_TYPED_FUNC_START(cpu_xscale_proc_fin)
mrc p15, 0, r0, c1, c0, 0 @ ctrl register
bic r0, r0, #0x1800 @ ...IZ...........
bic r0, r0, #0x0006 @ .............CA.
mcr p15, 0, r0, c1, c0, 0 @ disable caches
ret lr
+SYM_FUNC_END(cpu_xscale_proc_fin)
/*
* cpu_xscale_reset(loc)
@@ -141,7 +144,7 @@ ENTRY(cpu_xscale_proc_fin)
*/
.align 5
.pushsection .idmap.text, "ax"
-ENTRY(cpu_xscale_reset)
+SYM_TYPED_FUNC_START(cpu_xscale_reset)
mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
msr cpsr_c, r1 @ reset CPSR
mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB
@@ -159,7 +162,7 @@ ENTRY(cpu_xscale_reset)
@ already containing those two last instructions to survive.
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
ret r0
-ENDPROC(cpu_xscale_reset)
+SYM_FUNC_END(cpu_xscale_reset)
.popsection
/*
@@ -174,10 +177,11 @@ ENDPROC(cpu_xscale_reset)
*/
.align 5
-ENTRY(cpu_xscale_do_idle)
+SYM_TYPED_FUNC_START(cpu_xscale_do_idle)
mov r0, #1
mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE
ret lr
+SYM_FUNC_END(cpu_xscale_do_idle)
/* ================================= CACHE ================================ */
@@ -186,11 +190,11 @@ ENTRY(cpu_xscale_do_idle)
*
* Unconditionally clean and invalidate the entire icache.
*/
-ENTRY(xscale_flush_icache_all)
+SYM_TYPED_FUNC_START(xscale_flush_icache_all)
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
ret lr
-ENDPROC(xscale_flush_icache_all)
+SYM_FUNC_END(xscale_flush_icache_all)
/*
* flush_user_cache_all()
@@ -198,15 +202,14 @@ ENDPROC(xscale_flush_icache_all)
* Invalidate all cache entries in a particular address
* space.
*/
-ENTRY(xscale_flush_user_cache_all)
- /* FALLTHROUGH */
+SYM_FUNC_ALIAS(xscale_flush_user_cache_all, xscale_flush_kern_cache_all)
/*
* flush_kern_cache_all()
*
* Clean and invalidate the entire cache.
*/
-ENTRY(xscale_flush_kern_cache_all)
+SYM_TYPED_FUNC_START(xscale_flush_kern_cache_all)
mov r2, #VM_EXEC
mov ip, #0
__flush_whole_cache:
@@ -215,6 +218,7 @@ __flush_whole_cache:
mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_flush_kern_cache_all)
/*
* flush_user_cache_range(start, end, vm_flags)
@@ -227,7 +231,7 @@ __flush_whole_cache:
* - vma - vma_area_struct describing address space
*/
.align 5
-ENTRY(xscale_flush_user_cache_range)
+SYM_TYPED_FUNC_START(xscale_flush_user_cache_range)
mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #MAX_AREA_SIZE
@@ -244,6 +248,7 @@ ENTRY(xscale_flush_user_cache_range)
mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB
mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_flush_user_cache_range)
/*
* coherent_kern_range(start, end)
@@ -258,7 +263,7 @@ ENTRY(xscale_flush_user_cache_range)
* Note: single I-cache line invalidation isn't used here since
* it also trashes the mini I-cache used by JTAG debuggers.
*/
-ENTRY(xscale_coherent_kern_range)
+SYM_TYPED_FUNC_START(xscale_coherent_kern_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHELINESIZE
@@ -268,6 +273,7 @@ ENTRY(xscale_coherent_kern_range)
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_coherent_kern_range)
/*
* coherent_user_range(start, end)
@@ -279,7 +285,7 @@ ENTRY(xscale_coherent_kern_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(xscale_coherent_user_range)
+SYM_TYPED_FUNC_START(xscale_coherent_user_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry
@@ -290,6 +296,7 @@ ENTRY(xscale_coherent_user_range)
mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_coherent_user_range)
/*
* flush_kern_dcache_area(void *addr, size_t size)
@@ -300,7 +307,7 @@ ENTRY(xscale_coherent_user_range)
* - addr - kernel address
* - size - region size
*/
-ENTRY(xscale_flush_kern_dcache_area)
+SYM_TYPED_FUNC_START(xscale_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
@@ -311,6 +318,7 @@ ENTRY(xscale_flush_kern_dcache_area)
mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_flush_kern_dcache_area)
/*
* dma_inv_range(start, end)
@@ -361,7 +369,7 @@ xscale_dma_clean_range:
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(xscale_dma_flush_range)
+SYM_TYPED_FUNC_START(xscale_dma_flush_range)
bic r0, r0, #CACHELINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
@@ -370,6 +378,7 @@ ENTRY(xscale_dma_flush_range)
blo 1b
mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer
ret lr
+SYM_FUNC_END(xscale_dma_flush_range)
/*
* dma_map_area(start, size, dir)
@@ -377,13 +386,27 @@ ENTRY(xscale_dma_flush_range)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(xscale_dma_map_area)
+SYM_TYPED_FUNC_START(xscale_dma_map_area)
add r1, r1, r0
cmp r2, #DMA_TO_DEVICE
beq xscale_dma_clean_range
bcs xscale_dma_inv_range
b xscale_dma_flush_range
-ENDPROC(xscale_dma_map_area)
+SYM_FUNC_END(xscale_dma_map_area)
+
+/*
+ * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't
+ * clear the dirty bits, which means that if we invalidate a dirty line,
+ * the dirty data can still be written back to external memory later on.
+ *
+ * The recommended workaround is to always do a clean D-cache line before
+ * doing an invalidate D-cache line, so on the affected processors,
+ * dma_inv_range() is implemented as dma_flush_range().
+ *
+ * See erratum #25 of "Intel 80200 Processor Specification Update",
+ * revision January 22, 2003, available at:
+ * http://www.intel.com/design/iio/specupdt/273415.htm
+ */
/*
* dma_map_area(start, size, dir)
@@ -391,12 +414,12 @@ ENDPROC(xscale_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(xscale_80200_A0_A1_dma_map_area)
+SYM_TYPED_FUNC_START(xscale_80200_A0_A1_dma_map_area)
add r1, r1, r0
teq r2, #DMA_TO_DEVICE
beq xscale_dma_clean_range
b xscale_dma_flush_range
-ENDPROC(xscale_80200_A0_A1_dma_map_area)
+SYM_FUNC_END(xscale_80200_A0_A1_dma_map_area)
/*
* dma_unmap_area(start, size, dir)
@@ -404,59 +427,17 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area)
* - size - size of region
* - dir - DMA direction
*/
-ENTRY(xscale_dma_unmap_area)
+SYM_TYPED_FUNC_START(xscale_dma_unmap_area)
ret lr
-ENDPROC(xscale_dma_unmap_area)
-
- .globl xscale_flush_kern_cache_louis
- .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
+SYM_FUNC_END(xscale_dma_unmap_area)
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions xscale
-
-/*
- * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't
- * clear the dirty bits, which means that if we invalidate a dirty line,
- * the dirty data can still be written back to external memory later on.
- *
- * The recommended workaround is to always do a clean D-cache line before
- * doing an invalidate D-cache line, so on the affected processors,
- * dma_inv_range() is implemented as dma_flush_range().
- *
- * See erratum #25 of "Intel 80200 Processor Specification Update",
- * revision January 22, 2003, available at:
- * http://www.intel.com/design/iio/specupdt/273415.htm
- */
-.macro a0_alias basename
- .globl xscale_80200_A0_A1_\basename
- .type xscale_80200_A0_A1_\basename , %function
- .equ xscale_80200_A0_A1_\basename , xscale_\basename
-.endm
-
-/*
- * Most of the cache functions are unchanged for these processor revisions.
- * Export suitable alias symbols for the unchanged functions:
- */
- a0_alias flush_icache_all
- a0_alias flush_user_cache_all
- a0_alias flush_kern_cache_all
- a0_alias flush_kern_cache_louis
- a0_alias flush_user_cache_range
- a0_alias coherent_kern_range
- a0_alias coherent_user_range
- a0_alias flush_kern_dcache_area
- a0_alias dma_flush_range
- a0_alias dma_unmap_area
-
- @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
- define_cache_functions xscale_80200_A0_A1
-
-ENTRY(cpu_xscale_dcache_clean_area)
+SYM_TYPED_FUNC_START(cpu_xscale_dcache_clean_area)
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHELINESIZE
subs r1, r1, #CACHELINESIZE
bhi 1b
ret lr
+SYM_FUNC_END(cpu_xscale_dcache_clean_area)
/* =============================== PageTable ============================== */
@@ -468,13 +449,14 @@ ENTRY(cpu_xscale_dcache_clean_area)
* pgd: new page tables
*/
.align 5
-ENTRY(cpu_xscale_switch_mm)
+SYM_TYPED_FUNC_START(cpu_xscale_switch_mm)
clean_d_cache r1, r2
mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB
mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
cpwait_ret lr, ip
+SYM_FUNC_END(cpu_xscale_switch_mm)
/*
* cpu_xscale_set_pte_ext(ptep, pte, ext)
@@ -502,7 +484,7 @@ cpu_xscale_mt_table:
.long 0x00 @ unused
.align 5
-ENTRY(cpu_xscale_set_pte_ext)
+SYM_TYPED_FUNC_START(cpu_xscale_set_pte_ext)
xscale_set_pte_ext_prologue
@
@@ -520,6 +502,7 @@ ENTRY(cpu_xscale_set_pte_ext)
xscale_set_pte_ext_epilogue
ret lr
+SYM_FUNC_END(cpu_xscale_set_pte_ext)
.ltorg
.align
@@ -527,7 +510,7 @@ ENTRY(cpu_xscale_set_pte_ext)
.globl cpu_xscale_suspend_size
.equ cpu_xscale_suspend_size, 4 * 6
#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_xscale_do_suspend)
+SYM_TYPED_FUNC_START(cpu_xscale_do_suspend)
stmfd sp!, {r4 - r9, lr}
mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode
mrc p15, 0, r5, c15, c1, 0 @ CP access reg
@@ -538,9 +521,9 @@ ENTRY(cpu_xscale_do_suspend)
bic r4, r4, #2 @ clear frequency change bit
stmia r0, {r4 - r9} @ store cp regs
ldmfd sp!, {r4 - r9, pc}
-ENDPROC(cpu_xscale_do_suspend)
+SYM_FUNC_END(cpu_xscale_do_suspend)
-ENTRY(cpu_xscale_do_resume)
+SYM_TYPED_FUNC_START(cpu_xscale_do_resume)
ldmia r0, {r4 - r9} @ load cp regs
mov ip, #0
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
@@ -553,7 +536,7 @@ ENTRY(cpu_xscale_do_resume)
mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg
mov r0, r9 @ control register
b cpu_resume_mmu
-ENDPROC(cpu_xscale_do_resume)
+SYM_FUNC_END(cpu_xscale_do_resume)
#endif
.type __xscale_setup, #function
diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c
new file mode 100644
index 000000000000..bdbbf65d1b36
--- /dev/null
+++ b/arch/arm/mm/proc.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file defines C prototypes for the low-level processor assembly functions
+ * and creates a reference for CFI. This needs to be done for every assembly
+ * processor ("proc") function that is called from C but does not have a
+ * corresponding C implementation.
+ *
+ * Processors are listed in the order they appear in the Makefile.
+ *
+ * Functions are listed if and only if they see use on the target CPU, and in
+ * the order they are defined in struct processor.
+ */
+#include <asm/proc-fns.h>
+
+#ifdef CONFIG_CPU_ARM7TDMI
+void cpu_arm7tdmi_proc_init(void);
+__ADDRESSABLE(cpu_arm7tdmi_proc_init);
+void cpu_arm7tdmi_proc_fin(void);
+__ADDRESSABLE(cpu_arm7tdmi_proc_fin);
+void cpu_arm7tdmi_reset(void);
+__ADDRESSABLE(cpu_arm7tdmi_reset);
+int cpu_arm7tdmi_do_idle(void);
+__ADDRESSABLE(cpu_arm7tdmi_do_idle);
+void cpu_arm7tdmi_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm7tdmi_dcache_clean_area);
+void cpu_arm7tdmi_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm7tdmi_switch_mm);
+#endif
+
+#ifdef CONFIG_CPU_ARM720T
+void cpu_arm720_proc_init(void);
+__ADDRESSABLE(cpu_arm720_proc_init);
+void cpu_arm720_proc_fin(void);
+__ADDRESSABLE(cpu_arm720_proc_fin);
+void cpu_arm720_reset(void);
+__ADDRESSABLE(cpu_arm720_reset);
+int cpu_arm720_do_idle(void);
+__ADDRESSABLE(cpu_arm720_do_idle);
+void cpu_arm720_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm720_dcache_clean_area);
+void cpu_arm720_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm720_switch_mm);
+void cpu_arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm720_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM740T
+void cpu_arm740_proc_init(void);
+__ADDRESSABLE(cpu_arm740_proc_init);
+void cpu_arm740_proc_fin(void);
+__ADDRESSABLE(cpu_arm740_proc_fin);
+void cpu_arm740_reset(void);
+__ADDRESSABLE(cpu_arm740_reset);
+int cpu_arm740_do_idle(void);
+__ADDRESSABLE(cpu_arm740_do_idle);
+void cpu_arm740_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm740_dcache_clean_area);
+void cpu_arm740_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm740_switch_mm);
+#endif
+
+#ifdef CONFIG_CPU_ARM9TDMI
+void cpu_arm9tdmi_proc_init(void);
+__ADDRESSABLE(cpu_arm9tdmi_proc_init);
+void cpu_arm9tdmi_proc_fin(void);
+__ADDRESSABLE(cpu_arm9tdmi_proc_fin);
+void cpu_arm9tdmi_reset(void);
+__ADDRESSABLE(cpu_arm9tdmi_reset);
+int cpu_arm9tdmi_do_idle(void);
+__ADDRESSABLE(cpu_arm9tdmi_do_idle);
+void cpu_arm9tdmi_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm9tdmi_dcache_clean_area);
+void cpu_arm9tdmi_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm9tdmi_switch_mm);
+#endif
+
+#ifdef CONFIG_CPU_ARM920T
+void cpu_arm920_proc_init(void);
+__ADDRESSABLE(cpu_arm920_proc_init);
+void cpu_arm920_proc_fin(void);
+__ADDRESSABLE(cpu_arm920_proc_fin);
+void cpu_arm920_reset(void);
+__ADDRESSABLE(cpu_arm920_reset);
+int cpu_arm920_do_idle(void);
+__ADDRESSABLE(cpu_arm920_do_idle);
+void cpu_arm920_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm920_dcache_clean_area);
+void cpu_arm920_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm920_switch_mm);
+void cpu_arm920_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm920_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_arm920_do_suspend(void *);
+__ADDRESSABLE(cpu_arm920_do_suspend);
+void cpu_arm920_do_resume(void *);
+__ADDRESSABLE(cpu_arm920_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_ARM920T */
+
+#ifdef CONFIG_CPU_ARM922T
+void cpu_arm922_proc_init(void);
+__ADDRESSABLE(cpu_arm922_proc_init);
+void cpu_arm922_proc_fin(void);
+__ADDRESSABLE(cpu_arm922_proc_fin);
+void cpu_arm922_reset(void);
+__ADDRESSABLE(cpu_arm922_reset);
+int cpu_arm922_do_idle(void);
+__ADDRESSABLE(cpu_arm922_do_idle);
+void cpu_arm922_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm922_dcache_clean_area);
+void cpu_arm922_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm922_switch_mm);
+void cpu_arm922_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm922_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM925T
+void cpu_arm925_proc_init(void);
+__ADDRESSABLE(cpu_arm925_proc_init);
+void cpu_arm925_proc_fin(void);
+__ADDRESSABLE(cpu_arm925_proc_fin);
+void cpu_arm925_reset(void);
+__ADDRESSABLE(cpu_arm925_reset);
+int cpu_arm925_do_idle(void);
+__ADDRESSABLE(cpu_arm925_do_idle);
+void cpu_arm925_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm925_dcache_clean_area);
+void cpu_arm925_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm925_switch_mm);
+void cpu_arm925_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm925_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM926T
+void cpu_arm926_proc_init(void);
+__ADDRESSABLE(cpu_arm926_proc_init);
+void cpu_arm926_proc_fin(void);
+__ADDRESSABLE(cpu_arm926_proc_fin);
+void cpu_arm926_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_arm926_reset);
+int cpu_arm926_do_idle(void);
+__ADDRESSABLE(cpu_arm926_do_idle);
+void cpu_arm926_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm926_dcache_clean_area);
+void cpu_arm926_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm926_switch_mm);
+void cpu_arm926_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm926_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_arm926_do_suspend(void *);
+__ADDRESSABLE(cpu_arm926_do_suspend);
+void cpu_arm926_do_resume(void *);
+__ADDRESSABLE(cpu_arm926_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_ARM926T */
+
+#ifdef CONFIG_CPU_ARM940T
+void cpu_arm940_proc_init(void);
+__ADDRESSABLE(cpu_arm940_proc_init);
+void cpu_arm940_proc_fin(void);
+__ADDRESSABLE(cpu_arm940_proc_fin);
+void cpu_arm940_reset(void);
+__ADDRESSABLE(cpu_arm940_reset);
+int cpu_arm940_do_idle(void);
+__ADDRESSABLE(cpu_arm940_do_idle);
+void cpu_arm940_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm940_dcache_clean_area);
+void cpu_arm940_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm940_switch_mm);
+#endif
+
+#ifdef CONFIG_CPU_ARM946E
+void cpu_arm946_proc_init(void);
+__ADDRESSABLE(cpu_arm946_proc_init);
+void cpu_arm946_proc_fin(void);
+__ADDRESSABLE(cpu_arm946_proc_fin);
+void cpu_arm946_reset(void);
+__ADDRESSABLE(cpu_arm946_reset);
+int cpu_arm946_do_idle(void);
+__ADDRESSABLE(cpu_arm946_do_idle);
+void cpu_arm946_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm946_dcache_clean_area);
+void cpu_arm946_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm946_switch_mm);
+#endif
+
+#ifdef CONFIG_CPU_FA526
+void cpu_fa526_proc_init(void);
+__ADDRESSABLE(cpu_fa526_proc_init);
+void cpu_fa526_proc_fin(void);
+__ADDRESSABLE(cpu_fa526_proc_fin);
+void cpu_fa526_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_fa526_reset);
+int cpu_fa526_do_idle(void);
+__ADDRESSABLE(cpu_fa526_do_idle);
+void cpu_fa526_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_fa526_dcache_clean_area);
+void cpu_fa526_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_fa526_switch_mm);
+void cpu_fa526_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_fa526_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM1020
+void cpu_arm1020_proc_init(void);
+__ADDRESSABLE(cpu_arm1020_proc_init);
+void cpu_arm1020_proc_fin(void);
+__ADDRESSABLE(cpu_arm1020_proc_fin);
+void cpu_arm1020_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_arm1020_reset);
+int cpu_arm1020_do_idle(void);
+__ADDRESSABLE(cpu_arm1020_do_idle);
+void cpu_arm1020_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm1020_dcache_clean_area);
+void cpu_arm1020_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm1020_switch_mm);
+void cpu_arm1020_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm1020_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM1020E
+void cpu_arm1020e_proc_init(void);
+__ADDRESSABLE(cpu_arm1020e_proc_init);
+void cpu_arm1020e_proc_fin(void);
+__ADDRESSABLE(cpu_arm1020e_proc_fin);
+void cpu_arm1020e_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_arm1020e_reset);
+int cpu_arm1020e_do_idle(void);
+__ADDRESSABLE(cpu_arm1020e_do_idle);
+void cpu_arm1020e_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm1020e_dcache_clean_area);
+void cpu_arm1020e_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm1020e_switch_mm);
+void cpu_arm1020e_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm1020e_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM1022
+void cpu_arm1022_proc_init(void);
+__ADDRESSABLE(cpu_arm1022_proc_init);
+void cpu_arm1022_proc_fin(void);
+__ADDRESSABLE(cpu_arm1022_proc_fin);
+void cpu_arm1022_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_arm1022_reset);
+int cpu_arm1022_do_idle(void);
+__ADDRESSABLE(cpu_arm1022_do_idle);
+void cpu_arm1022_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm1022_dcache_clean_area);
+void cpu_arm1022_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm1022_switch_mm);
+void cpu_arm1022_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm1022_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_ARM1026
+void cpu_arm1026_proc_init(void);
+__ADDRESSABLE(cpu_arm1026_proc_init);
+void cpu_arm1026_proc_fin(void);
+__ADDRESSABLE(cpu_arm1026_proc_fin);
+void cpu_arm1026_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_arm1026_reset);
+int cpu_arm1026_do_idle(void);
+__ADDRESSABLE(cpu_arm1026_do_idle);
+void cpu_arm1026_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_arm1026_dcache_clean_area);
+void cpu_arm1026_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_arm1026_switch_mm);
+void cpu_arm1026_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_arm1026_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_SA110
+void cpu_sa110_proc_init(void);
+__ADDRESSABLE(cpu_sa110_proc_init);
+void cpu_sa110_proc_fin(void);
+__ADDRESSABLE(cpu_sa110_proc_fin);
+void cpu_sa110_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_sa110_reset);
+int cpu_sa110_do_idle(void);
+__ADDRESSABLE(cpu_sa110_do_idle);
+void cpu_sa110_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_sa110_dcache_clean_area);
+void cpu_sa110_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_sa110_switch_mm);
+void cpu_sa110_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_sa110_set_pte_ext);
+#endif
+
+#ifdef CONFIG_CPU_SA1100
+void cpu_sa1100_proc_init(void);
+__ADDRESSABLE(cpu_sa1100_proc_init);
+void cpu_sa1100_proc_fin(void);
+__ADDRESSABLE(cpu_sa1100_proc_fin);
+void cpu_sa1100_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_sa1100_reset);
+int cpu_sa1100_do_idle(void);
+__ADDRESSABLE(cpu_sa1100_do_idle);
+void cpu_sa1100_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_sa1100_dcache_clean_area);
+void cpu_sa1100_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_sa1100_switch_mm);
+void cpu_sa1100_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_sa1100_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_sa1100_do_suspend(void *);
+__ADDRESSABLE(cpu_sa1100_do_suspend);
+void cpu_sa1100_do_resume(void *);
+__ADDRESSABLE(cpu_sa1100_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_SA1100 */
+
+#ifdef CONFIG_CPU_XSCALE
+void cpu_xscale_proc_init(void);
+__ADDRESSABLE(cpu_xscale_proc_init);
+void cpu_xscale_proc_fin(void);
+__ADDRESSABLE(cpu_xscale_proc_fin);
+void cpu_xscale_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_xscale_reset);
+int cpu_xscale_do_idle(void);
+__ADDRESSABLE(cpu_xscale_do_idle);
+void cpu_xscale_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_xscale_dcache_clean_area);
+void cpu_xscale_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_xscale_switch_mm);
+void cpu_xscale_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_xscale_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_xscale_do_suspend(void *);
+__ADDRESSABLE(cpu_xscale_do_suspend);
+void cpu_xscale_do_resume(void *);
+__ADDRESSABLE(cpu_xscale_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_XSCALE */
+
+#ifdef CONFIG_CPU_XSC3
+void cpu_xsc3_proc_init(void);
+__ADDRESSABLE(cpu_xsc3_proc_init);
+void cpu_xsc3_proc_fin(void);
+__ADDRESSABLE(cpu_xsc3_proc_fin);
+void cpu_xsc3_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_xsc3_reset);
+int cpu_xsc3_do_idle(void);
+__ADDRESSABLE(cpu_xsc3_do_idle);
+void cpu_xsc3_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_xsc3_dcache_clean_area);
+void cpu_xsc3_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_xsc3_switch_mm);
+void cpu_xsc3_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_xsc3_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_xsc3_do_suspend(void *);
+__ADDRESSABLE(cpu_xsc3_do_suspend);
+void cpu_xsc3_do_resume(void *);
+__ADDRESSABLE(cpu_xsc3_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_XSC3 */
+
+#ifdef CONFIG_CPU_MOHAWK
+void cpu_mohawk_proc_init(void);
+__ADDRESSABLE(cpu_mohawk_proc_init);
+void cpu_mohawk_proc_fin(void);
+__ADDRESSABLE(cpu_mohawk_proc_fin);
+void cpu_mohawk_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_mohawk_reset);
+int cpu_mohawk_do_idle(void);
+__ADDRESSABLE(cpu_mohawk_do_idle);
+void cpu_mohawk_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_mohawk_dcache_clean_area);
+void cpu_mohawk_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_mohawk_switch_mm);
+void cpu_mohawk_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_mohawk_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_mohawk_do_suspend(void *);
+__ADDRESSABLE(cpu_mohawk_do_suspend);
+void cpu_mohawk_do_resume(void *);
+__ADDRESSABLE(cpu_mohawk_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_MOHAWK */
+
+#ifdef CONFIG_CPU_FEROCEON
+void cpu_feroceon_proc_init(void);
+__ADDRESSABLE(cpu_feroceon_proc_init);
+void cpu_feroceon_proc_fin(void);
+__ADDRESSABLE(cpu_feroceon_proc_fin);
+void cpu_feroceon_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_feroceon_reset);
+int cpu_feroceon_do_idle(void);
+__ADDRESSABLE(cpu_feroceon_do_idle);
+void cpu_feroceon_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_feroceon_dcache_clean_area);
+void cpu_feroceon_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_feroceon_switch_mm);
+void cpu_feroceon_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_feroceon_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_feroceon_do_suspend(void *);
+__ADDRESSABLE(cpu_feroceon_do_suspend);
+void cpu_feroceon_do_resume(void *);
+__ADDRESSABLE(cpu_feroceon_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_FEROCEON */
+
+#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
+void cpu_v6_proc_init(void);
+__ADDRESSABLE(cpu_v6_proc_init);
+void cpu_v6_proc_fin(void);
+__ADDRESSABLE(cpu_v6_proc_fin);
+void cpu_v6_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_v6_reset);
+int cpu_v6_do_idle(void);
+__ADDRESSABLE(cpu_v6_do_idle);
+void cpu_v6_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_v6_dcache_clean_area);
+void cpu_v6_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_v6_switch_mm);
+void cpu_v6_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_v6_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_v6_do_suspend(void *);
+__ADDRESSABLE(cpu_v6_do_suspend);
+void cpu_v6_do_resume(void *);
+__ADDRESSABLE(cpu_v6_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CPU_V6 */
+
+#ifdef CONFIG_CPU_V7
+void cpu_v7_proc_init(void);
+__ADDRESSABLE(cpu_v7_proc_init);
+void cpu_v7_proc_fin(void);
+__ADDRESSABLE(cpu_v7_proc_fin);
+void cpu_v7_reset(void);
+__ADDRESSABLE(cpu_v7_reset);
+int cpu_v7_do_idle(void);
+__ADDRESSABLE(cpu_v7_do_idle);
+#ifdef CONFIG_PJ4B_ERRATA_4742
+int cpu_pj4b_do_idle(void);
+__ADDRESSABLE(cpu_pj4b_do_idle);
+#endif
+void cpu_v7_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_v7_dcache_clean_area);
+void cpu_v7_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+/* Special switch_mm() callbacks to work around bugs in v7 */
+__ADDRESSABLE(cpu_v7_switch_mm);
+void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_v7_iciallu_switch_mm);
+void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_v7_bpiall_switch_mm);
+#ifdef CONFIG_ARM_LPAE
+void cpu_v7_set_pte_ext(pte_t *ptep, pte_t pte);
+#else
+void cpu_v7_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+#endif
+__ADDRESSABLE(cpu_v7_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_v7_do_suspend(void *);
+__ADDRESSABLE(cpu_v7_do_suspend);
+void cpu_v7_do_resume(void *);
+__ADDRESSABLE(cpu_v7_do_resume);
+/* Special versions of suspend and resume for the CA9MP cores */
+void cpu_ca9mp_do_suspend(void *);
+__ADDRESSABLE(cpu_ca9mp_do_suspend);
+void cpu_ca9mp_do_resume(void *);
+__ADDRESSABLE(cpu_ca9mp_do_resume);
+/* Special versions of suspend and resume for the Marvell PJ4B cores */
+#ifdef CONFIG_CPU_PJ4B
+void cpu_pj4b_do_suspend(void *);
+__ADDRESSABLE(cpu_pj4b_do_suspend);
+void cpu_pj4b_do_resume(void *);
+__ADDRESSABLE(cpu_pj4b_do_resume);
+#endif /* CONFIG_CPU_PJ4B */
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+#endif /* CONFIG_CPU_V7 */
+
+#ifdef CONFIG_CPU_V7M
+void cpu_v7m_proc_init(void);
+__ADDRESSABLE(cpu_v7m_proc_init);
+void cpu_v7m_proc_fin(void);
+__ADDRESSABLE(cpu_v7m_proc_fin);
+void cpu_v7m_reset(unsigned long addr, bool hvc);
+__ADDRESSABLE(cpu_v7m_reset);
+int cpu_v7m_do_idle(void);
+__ADDRESSABLE(cpu_v7m_do_idle);
+void cpu_v7m_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_v7m_dcache_clean_area);
+void cpu_v7m_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+__ADDRESSABLE(cpu_v7m_switch_mm);
+void cpu_v7m_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
+__ADDRESSABLE(cpu_v7m_set_pte_ext);
+#ifdef CONFIG_ARM_CPU_SUSPEND
+void cpu_v7m_do_suspend(void *);
+__ADDRESSABLE(cpu_v7m_do_suspend);
+void cpu_v7m_do_resume(void *);
+__ADDRESSABLE(cpu_v7m_do_resume);
+#endif /* CONFIG_ARM_CPU_SUSPEND */
+void cpu_cm7_proc_fin(void);
+__ADDRESSABLE(cpu_cm7_proc_fin);
+void cpu_cm7_dcache_clean_area(void *addr, int size);
+__ADDRESSABLE(cpu_cm7_dcache_clean_area);
+#endif /* CONFIG_CPU_V7M */
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
index def6161ec452..85a6fe766b21 100644
--- a/arch/arm/mm/tlb-fa.S
+++ b/arch/arm/mm/tlb-fa.S
@@ -15,6 +15,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
@@ -31,7 +32,7 @@
* - mm - mm_struct describing address space
*/
.align 4
-ENTRY(fa_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(fa_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
@@ -46,9 +47,10 @@ ENTRY(fa_flush_user_tlb_range)
blo 1b
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
ret lr
+SYM_FUNC_END(fa_flush_user_tlb_range)
-ENTRY(fa_flush_kern_tlb_range)
+SYM_TYPED_FUNC_START(fa_flush_kern_tlb_range)
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
bic r0, r0, #0x0ff
@@ -60,8 +62,4 @@ ENTRY(fa_flush_kern_tlb_range)
mcr p15, 0, r3, c7, c10, 4 @ data write barrier
mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb)
ret lr
-
- __INITDATA
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions fa, fa_tlb_flags
+SYM_FUNC_END(fa_flush_kern_tlb_range)
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
index b962b4e75158..09ff69008d94 100644
--- a/arch/arm/mm/tlb-v4.S
+++ b/arch/arm/mm/tlb-v4.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
@@ -27,7 +28,7 @@
* - mm - mm_struct describing address space
*/
.align 5
-ENTRY(v4_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(v4_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
@@ -40,6 +41,7 @@ ENTRY(v4_flush_user_tlb_range)
cmp r0, r1
blo 1b
ret lr
+SYM_FUNC_END(v4_flush_user_tlb_range)
/*
* v4_flush_kern_tlb_range(start, end)
@@ -50,10 +52,11 @@ ENTRY(v4_flush_user_tlb_range)
* - start - virtual address (may not be aligned)
* - end - virtual address (may not be aligned)
*/
+#ifdef CONFIG_CFI_CLANG
+SYM_TYPED_FUNC_START(v4_flush_kern_tlb_range)
+ b .v4_flush_kern_tlb_range
+SYM_FUNC_END(v4_flush_kern_tlb_range)
+#else
.globl v4_flush_kern_tlb_range
.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range
-
- __INITDATA
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions v4, v4_tlb_flags
+#endif
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
index 9348bba7586a..04e46c359e75 100644
--- a/arch/arm/mm/tlb-v4wb.S
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
@@ -27,7 +28,7 @@
* - mm - mm_struct describing address space
*/
.align 5
-ENTRY(v4wb_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(v4wb_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
@@ -43,6 +44,7 @@ ENTRY(v4wb_flush_user_tlb_range)
cmp r0, r1
blo 1b
ret lr
+SYM_FUNC_END(v4wb_flush_user_tlb_range)
/*
* v4_flush_kern_tlb_range(start, end)
@@ -53,7 +55,7 @@ ENTRY(v4wb_flush_user_tlb_range)
* - start - virtual address (may not be aligned)
* - end - virtual address (may not be aligned)
*/
-ENTRY(v4wb_flush_kern_tlb_range)
+SYM_TYPED_FUNC_START(v4wb_flush_kern_tlb_range)
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
bic r0, r0, #0x0ff
@@ -64,8 +66,4 @@ ENTRY(v4wb_flush_kern_tlb_range)
cmp r0, r1
blo 1b
ret lr
-
- __INITDATA
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions v4wb, v4wb_tlb_flags
+SYM_FUNC_END(v4wb_flush_kern_tlb_range)
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
index d4f9040a4111..502dfe5628a3 100644
--- a/arch/arm/mm/tlb-v4wbi.S
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/tlbflush.h>
@@ -26,7 +27,7 @@
* - mm - mm_struct describing address space
*/
.align 5
-ENTRY(v4wbi_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(v4wbi_flush_user_tlb_range)
vma_vm_mm ip, r2
act_mm r3 @ get current->active_mm
eors r3, ip, r3 @ == mm ?
@@ -43,8 +44,9 @@ ENTRY(v4wbi_flush_user_tlb_range)
cmp r0, r1
blo 1b
ret lr
+SYM_FUNC_END(v4wbi_flush_user_tlb_range)
-ENTRY(v4wbi_flush_kern_tlb_range)
+SYM_TYPED_FUNC_START(v4wbi_flush_kern_tlb_range)
mov r3, #0
mcr p15, 0, r3, c7, c10, 4 @ drain WB
bic r0, r0, #0x0ff
@@ -55,8 +57,4 @@ ENTRY(v4wbi_flush_kern_tlb_range)
cmp r0, r1
blo 1b
ret lr
-
- __INITDATA
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions v4wbi, v4wbi_tlb_flags
+SYM_FUNC_END(v4wbi_flush_kern_tlb_range)
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index 1d91e49b2c2d..8256a67ac654 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -9,6 +9,7 @@
*/
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/page.h>
@@ -32,7 +33,7 @@
* - the "Invalidate single entry" instruction will invalidate
* both the I and the D TLBs on Harvard-style TLBs
*/
-ENTRY(v6wbi_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(v6wbi_flush_user_tlb_range)
vma_vm_mm r3, r2 @ get vma->vm_mm
mov ip, #0
mmid r3, r3 @ get vm_mm->context.id
@@ -56,6 +57,7 @@ ENTRY(v6wbi_flush_user_tlb_range)
blo 1b
mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier
ret lr
+SYM_FUNC_END(v6wbi_flush_user_tlb_range)
/*
* v6wbi_flush_kern_tlb_range(start,end)
@@ -65,7 +67,7 @@ ENTRY(v6wbi_flush_user_tlb_range)
* - start - start address (may not be aligned)
* - end - end address (exclusive, may not be aligned)
*/
-ENTRY(v6wbi_flush_kern_tlb_range)
+SYM_TYPED_FUNC_START(v6wbi_flush_kern_tlb_range)
mov r2, #0
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
mov r0, r0, lsr #PAGE_SHIFT @ align address
@@ -85,8 +87,4 @@ ENTRY(v6wbi_flush_kern_tlb_range)
mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier
mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb)
ret lr
-
- __INIT
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions v6wbi, v6wbi_tlb_flags
+SYM_FUNC_END(v6wbi_flush_kern_tlb_range)
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 35fd6d4f0d03..f1aa0764a2cc 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -10,6 +10,7 @@
*/
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/page.h>
@@ -31,7 +32,7 @@
* - the "Invalidate single entry" instruction will invalidate
* both the I and the D TLBs on Harvard-style TLBs
*/
-ENTRY(v7wbi_flush_user_tlb_range)
+SYM_TYPED_FUNC_START(v7wbi_flush_user_tlb_range)
vma_vm_mm r3, r2 @ get vma->vm_mm
mmid r3, r3 @ get vm_mm->context.id
dsb ish
@@ -57,7 +58,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
blo 1b
dsb ish
ret lr
-ENDPROC(v7wbi_flush_user_tlb_range)
+SYM_FUNC_END(v7wbi_flush_user_tlb_range)
/*
* v7wbi_flush_kern_tlb_range(start,end)
@@ -67,7 +68,7 @@ ENDPROC(v7wbi_flush_user_tlb_range)
* - start - start address (may not be aligned)
* - end - end address (exclusive, may not be aligned)
*/
-ENTRY(v7wbi_flush_kern_tlb_range)
+SYM_TYPED_FUNC_START(v7wbi_flush_kern_tlb_range)
dsb ish
mov r0, r0, lsr #PAGE_SHIFT @ align address
mov r1, r1, lsr #PAGE_SHIFT
@@ -86,9 +87,4 @@ ENTRY(v7wbi_flush_kern_tlb_range)
dsb ish
isb
ret lr
-ENDPROC(v7wbi_flush_kern_tlb_range)
-
- __INIT
-
- /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
- define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp
+SYM_FUNC_END(v7wbi_flush_kern_tlb_range)
diff --git a/arch/arm/mm/tlb.c b/arch/arm/mm/tlb.c
new file mode 100644
index 000000000000..42359793120b
--- /dev/null
+++ b/arch/arm/mm/tlb.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2024 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_CPU_TLB_V4WT
+void v4_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void v4_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns v4_tlb_fns __initconst = {
+ .flush_user_range = v4_flush_user_tlb_range,
+ .flush_kern_range = v4_flush_kern_tlb_range,
+ .tlb_flags = v4_tlb_flags,
+};
+#endif
+
+#ifdef CONFIG_CPU_TLB_V4WB
+void v4wb_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void v4wb_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns v4wb_tlb_fns __initconst = {
+ .flush_user_range = v4wb_flush_user_tlb_range,
+ .flush_kern_range = v4wb_flush_kern_tlb_range,
+ .tlb_flags = v4wb_tlb_flags,
+};
+#endif
+
+#if defined(CONFIG_CPU_TLB_V4WBI) || defined(CONFIG_CPU_TLB_FEROCEON)
+void v4wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void v4wbi_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns v4wbi_tlb_fns __initconst = {
+ .flush_user_range = v4wbi_flush_user_tlb_range,
+ .flush_kern_range = v4wbi_flush_kern_tlb_range,
+ .tlb_flags = v4wbi_tlb_flags,
+};
+#endif
+
+#ifdef CONFIG_CPU_TLB_V6
+void v6wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void v6wbi_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns v6wbi_tlb_fns __initconst = {
+ .flush_user_range = v6wbi_flush_user_tlb_range,
+ .flush_kern_range = v6wbi_flush_kern_tlb_range,
+ .tlb_flags = v6wbi_tlb_flags,
+};
+#endif
+
+#ifdef CONFIG_CPU_TLB_V7
+void v7wbi_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void v7wbi_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns v7wbi_tlb_fns __initconst = {
+ .flush_user_range = v7wbi_flush_user_tlb_range,
+ .flush_kern_range = v7wbi_flush_kern_tlb_range,
+ .tlb_flags = IS_ENABLED(CONFIG_SMP) ? v7wbi_tlb_flags_smp
+ : v7wbi_tlb_flags_up,
+};
+
+#ifdef CONFIG_SMP_ON_UP
+/* This will be run-time patched so the offset better be right */
+static_assert(offsetof(struct cpu_tlb_fns, tlb_flags) == 8);
+
+asm(" .pushsection \".alt.smp.init\", \"a\" \n" \
+ " .align 2 \n" \
+ " .long v7wbi_tlb_fns + 8 - . \n" \
+ " .long " __stringify(v7wbi_tlb_flags_up) " \n" \
+ " .popsection \n");
+#endif
+#endif
+
+#ifdef CONFIG_CPU_TLB_FA
+void fa_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+void fa_flush_kern_tlb_range(unsigned long, unsigned long);
+
+struct cpu_tlb_fns fa_tlb_fns __initconst = {
+ .flush_user_range = fa_flush_user_tlb_range,
+ .flush_kern_range = fa_flush_kern_tlb_range,
+ .tlb_flags = fa_tlb_flags,
+};
+#endif
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a04059c31aba..2df0818c3ca9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,6 +105,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_EXECMEM_LATE if EXECMEM
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
select ARCH_HAS_UBSAN
diff --git a/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts b/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts
index 5afcb8212f49..3f0d3e33894a 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts
+++ b/arch/arm64/boot/dts/qcom/sc7180-acer-aspire1.dts
@@ -255,7 +255,25 @@
clock-frequency = <400000>;
status = "okay";
- /* embedded-controller@76 */
+ embedded-controller@76 {
+ compatible = "acer,aspire1-ec";
+ reg = <0x76>;
+
+ interrupts-extended = <&tlmm 30 IRQ_TYPE_LEVEL_LOW>;
+
+ pinctrl-0 = <&ec_int_default>;
+ pinctrl-names = "default";
+
+ connector {
+ compatible = "usb-c-connector";
+
+ port {
+ ec_dp_in: endpoint {
+ remote-endpoint = <&mdss_dp_out>;
+ };
+ };
+ };
+ };
};
&i2c4 {
@@ -419,6 +437,19 @@
status = "okay";
};
+&mdss_dp {
+ data-lanes = <0 1>;
+
+ vdda-1p2-supply = <&vreg_l3c_1p2>;
+ vdda-0p9-supply = <&vreg_l4a_0p8>;
+
+ status = "okay";
+};
+
+&mdss_dp_out {
+ remote-endpoint = <&ec_dp_in>;
+};
+
&mdss_dsi0 {
vdda-supply = <&vreg_l3c_1p2>;
status = "okay";
@@ -857,6 +888,13 @@
bias-disable;
};
+ ec_int_default: ec-int-default-state {
+ pins = "gpio30";
+ function = "gpio";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
edp_bridge_irq_default: edp-bridge-irq-default-state {
pins = "gpio11";
function = "gpio";
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 81606bf7d5ac..7abf09df7033 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -404,6 +404,18 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
}
+/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
+static inline bool esr_iss_is_eretax(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERET;
+}
+
+/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */
+static inline bool esr_iss_is_eretab(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERETA;
+}
+
const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24b5e6b23417..a6330460d9e5 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -73,10 +73,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
- __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
@@ -241,8 +239,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
-extern u64 __vgic_v3_read_vmcr(void);
-extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
extern u64 __kvm_get_mdcr_el2(void);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 975af30af31f..501e3e019c93 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -125,16 +125,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
-}
-
-static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
-}
-
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
@@ -587,16 +577,14 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
} else if (has_hvhe()) {
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
- if (!vcpu_has_sve(vcpu) ||
- (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
} else {
val = CPTR_NVHE_EL2_RES1;
- if (vcpu_has_sve(vcpu) &&
- (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
+ if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
val |= CPTR_EL2_TZ;
if (cpus_have_final_cap(ARM64_SME))
val &= ~CPTR_EL2_TSM;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 9e8a496fb284..8170c04fde91 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -211,6 +211,7 @@ typedef unsigned int pkvm_handle_t;
struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
+ bool enabled;
};
struct kvm_mpidr_data {
@@ -220,20 +221,10 @@ struct kvm_mpidr_data {
static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
{
- unsigned long mask = data->mpidr_mask;
- u64 aff = mpidr & MPIDR_HWID_BITMASK;
- int nbits, bit, bit_idx = 0;
- u16 index = 0;
+ unsigned long index = 0, mask = data->mpidr_mask;
+ unsigned long aff = mpidr & MPIDR_HWID_BITMASK;
- /*
- * If this looks like RISC-V's BEXT or x86's PEXT
- * instructions, it isn't by accident.
- */
- nbits = fls(mask);
- for_each_set_bit(bit, &mask, nbits) {
- index |= (aff & BIT(bit)) >> (bit - bit_idx);
- bit_idx++;
- }
+ bitmap_gather(&index, &aff, &mask, fls(mask));
return index;
}
@@ -530,8 +521,42 @@ struct kvm_cpu_context {
u64 *vncr_array;
};
+/*
+ * This structure is instantiated on a per-CPU basis, and contains
+ * data that is:
+ *
+ * - tied to a single physical CPU, and
+ * - either have a lifetime that does not extend past vcpu_put()
+ * - or is an invariant for the lifetime of the system
+ *
+ * Use host_data_ptr(field) as a way to access a pointer to such a
+ * field.
+ */
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
+ struct user_fpsimd_state *fpsimd_state; /* hyp VA */
+
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_owner;
+
+ /*
+ * host_debug_state contains the host registers which are
+ * saved and restored during world switches.
+ */
+ struct {
+ /* {Break,watch}point registers */
+ struct kvm_guest_debug_arch regs;
+ /* Statistical profiling extension */
+ u64 pmscr_el1;
+ /* Self-hosted trace */
+ u64 trfcr_el1;
+ /* Values of trap registers for the host before guest entry. */
+ u64 mdcr_el2;
+ } host_debug_state;
};
struct kvm_host_psci_config {
@@ -592,19 +617,9 @@ struct kvm_vcpu_arch {
u64 mdcr_el2;
u64 cptr_el2;
- /* Values of trap registers for the host before guest entry. */
- u64 mdcr_el2_host;
-
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Ownership of the FP regs */
- enum {
- FP_STATE_FREE,
- FP_STATE_HOST_OWNED,
- FP_STATE_GUEST_OWNED,
- } fp_state;
-
/* Configuration flags, set once and for all before the vcpu can run */
u8 cflags;
@@ -627,11 +642,10 @@ struct kvm_vcpu_arch {
* We maintain more than a single set of debug registers to support
* debugging the guest from the host and to maintain separate host and
* guest state during world switches. vcpu_debug_state are the debug
- * registers of the vcpu as the guest sees them. host_debug_state are
- * the host registers which are saved and restored during
- * world switches. external_debug_state contains the debug
- * values we want to debug the guest. This is set via the
- * KVM_SET_GUEST_DEBUG ioctl.
+ * registers of the vcpu as the guest sees them.
+ *
+ * external_debug_state contains the debug values we want to debug the
+ * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
*
* debug_ptr points to the set of debug registers that should be loaded
* onto the hardware when running the guest.
@@ -640,18 +654,6 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
- struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
- struct task_struct *parent_task;
-
- struct {
- /* {Break,watch}point registers */
- struct kvm_guest_debug_arch regs;
- /* Statistical profiling extension */
- u64 pmscr_el1;
- /* Self-hosted trace */
- u64 trfcr_el1;
- } host_debug_state;
-
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
@@ -817,8 +819,6 @@ struct kvm_vcpu_arch {
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-/* vcpu running in HYP context */
-#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
@@ -896,7 +896,7 @@ struct kvm_vcpu_arch {
* Don't bother with VNCR-based accesses in the nVHE code, it has no
* business dealing with NV.
*/
-static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
{
#if !defined (__KVM_NVHE_HYPERVISOR__)
if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
@@ -906,6 +906,13 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
return (u64 *)&ctxt->sys_regs[r];
}
+#define __ctxt_sys_reg(c,r) \
+ ({ \
+ BUILD_BUG_ON(__builtin_constant_p(r) && \
+ (r) >= NR_SYS_REGS); \
+ ___ctxt_sys_reg(c, r); \
+ })
+
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
@@ -1168,6 +1175,44 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
+/*
+ * How we access per-CPU host data depends on the where we access it from,
+ * and the mode we're in:
+ *
+ * - VHE and nVHE hypervisor bits use their locally defined instance
+ *
+ * - the rest of the kernel use either the VHE or nVHE one, depending on
+ * the mode we're running in.
+ *
+ * Unless we're in protected mode, fully deprivileged, and the nVHE
+ * per-CPU stuff is exclusively accessible to the protected EL2 code.
+ * In this case, the EL1 code uses the *VHE* data as its private state
+ * (which makes sense in a way as there shouldn't be any shared state
+ * between the host and the hypervisor).
+ *
+ * Yes, this is all totally trivial. Shoot me now.
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f)
+#else
+#define host_data_ptr(f) \
+ (static_branch_unlikely(&kvm_protected_mode_initialized) ? \
+ &this_cpu_ptr(&kvm_host_data)->f : \
+ &this_cpu_ptr_hyp_sym(kvm_host_data)->f)
+#endif
+
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED;
+}
+
+/* Check whether the FP regs are owned by the host */
+static inline bool host_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED;
+}
+
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
@@ -1211,7 +1256,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
@@ -1247,10 +1291,9 @@ struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
-static inline bool kvm_vm_is_protected(struct kvm *kvm)
-{
- return false;
-}
+#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled)
+
+#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
@@ -1275,6 +1318,8 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
@@ -1331,4 +1376,19 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
(get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+/* Check for a given level of PAuth support */
+#define kvm_has_pauth(k, l) \
+ ({ \
+ bool pa, pi, pa3; \
+ \
+ pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \
+ pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \
+ pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \
+ pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \
+ pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \
+ pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \
+ \
+ (pa + pi + pa3) == 1; \
+ })
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e2a1ac0c9bb..3e80464f8953 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -80,8 +80,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index c77d795556e1..5e0ab0596246 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -60,7 +60,20 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
return ttbr0 & ~GENMASK_ULL(63, 48);
}
+extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
int kvm_init_nv_sysregs(struct kvm *kvm);
+#ifdef CONFIG_ARM64_PTR_AUTH
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+#else
+static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ /* We really should never execute this... */
+ WARN_ON_ONCE(1);
+ *elr = 0xbad9acc0debadbad;
+ return false;
+}
+#endif
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 0cd0965255d2..d81bac256abc 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -99,5 +99,26 @@ alternative_else_nop_endif
.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#else /* !__ASSEMBLY */
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+ } while(0)
+
+#define ptrauth_save_keys(ctxt) \
+ do { \
+ __ptrauth_save_key(ctxt, APIA); \
+ __ptrauth_save_key(ctxt, APIB); \
+ __ptrauth_save_key(ctxt, APDA); \
+ __ptrauth_save_key(ctxt, APDB); \
+ __ptrauth_save_key(ctxt, APGA); \
+ } while(0)
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index ef207a0d4f0d..9943ff0af4c9 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -297,6 +297,7 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
#define TCR_NFD1 (UL(1) << 54)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 261d6e9df2e1..ebf4a9f943ed 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -82,6 +82,12 @@ bool is_kvm_arm_initialised(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+static inline bool is_pkvm_initialized(void)
+{
+ return IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized);
+}
+
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
@@ -89,8 +95,7 @@ static inline bool is_hyp_mode_available(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return true;
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
@@ -104,8 +109,7 @@ static inline bool is_hyp_mode_mismatched(void)
* If KVM protected mode is initialized, all CPUs must have been booted
* in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
*/
- if (IS_ENABLED(CONFIG_KVM) &&
- static_branch_likely(&kvm_protected_mode_initialized))
+ if (is_pkvm_initialized())
return false;
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..36b25af56324 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -12,144 +12,18 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
-#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
#include <linux/random.h>
#include <linux/scs.h>
-#include <linux/vmalloc.h>
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
-
-/*
- * Choose a random page-aligned base address for a window of 'size' bytes which
- * entirely contains the interval [start, end - 1].
- */
-static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
-{
- u64 max_pgoff, pgoff;
-
- if ((end - start) >= size)
- return 0;
-
- max_pgoff = (size - (end - start)) / PAGE_SIZE;
- pgoff = get_random_u32_inclusive(0, max_pgoff);
-
- return start - pgoff * PAGE_SIZE;
-}
-
-/*
- * Modules may directly reference data and text anywhere within the kernel
- * image and other modules. References using PREL32 relocations have a +/-2G
- * range, and so we need to ensure that the entire kernel image and all modules
- * fall within a 2G window such that these are always within range.
- *
- * Modules may directly branch to functions and code within the kernel text,
- * and to functions and code within other modules. These branches will use
- * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
- * that the entire kernel text and all module text falls within a 128M window
- * such that these are always within range. With PLTs, we can expand this to a
- * 2G window.
- *
- * We chose the 128M region to surround the entire kernel image (rather than
- * just the text) as using the same bounds for the 128M and 2G regions ensures
- * by construction that we never select a 128M region that is not a subset of
- * the 2G region. For very large and unusual kernel configurations this means
- * we may fall back to PLTs where they could have been avoided, but this keeps
- * the logic significantly simpler.
- */
-static int __init module_init_limits(void)
-{
- u64 kernel_end = (u64)_end;
- u64 kernel_start = (u64)_text;
- u64 kernel_size = kernel_end - kernel_start;
-
- /*
- * The default modules region is placed immediately below the kernel
- * image, and is large enough to use the full 2G relocation range.
- */
- BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
- BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
-
- if (!kaslr_enabled()) {
- if (kernel_size < SZ_128M)
- module_direct_base = kernel_end - SZ_128M;
- if (kernel_size < SZ_2G)
- module_plt_base = kernel_end - SZ_2G;
- } else {
- u64 min = kernel_start;
- u64 max = kernel_end;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
- } else {
- module_direct_base = random_bounding_box(SZ_128M, min, max);
- if (module_direct_base) {
- min = module_direct_base;
- max = module_direct_base + SZ_128M;
- }
- }
-
- module_plt_base = random_bounding_box(SZ_2G, min, max);
- }
-
- pr_info("%llu pages in range for non-PLT usage",
- module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
- pr_info("%llu pages in range for PLT usage",
- module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
-
- return 0;
-}
-subsys_initcall(module_init_limits);
-
-void *module_alloc(unsigned long size)
-{
- void *p = NULL;
-
- /*
- * Where possible, prefer to allocate within direct branch range of the
- * kernel such that no PLTs are necessary.
- */
- if (module_direct_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_direct_base,
- module_direct_base + SZ_128M,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p && module_plt_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_plt_base,
- module_plt_base + SZ_2G,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p) {
- pr_warn_ratelimited("%s: unable to allocate memory\n",
- __func__);
- }
-
- if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
- vfree(p);
- return NULL;
- }
-
- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(p);
-}
-
enum aarch64_reloc_op {
RELOC_OP_NONE,
RELOC_OP_ABS,
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index 48c1aa456af9..29d4b6244a6f 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -210,8 +210,8 @@ static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
- { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" },
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index c0c050e53157..04882b577575 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
+kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c4a0a35e02c7..9996a989b52e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -35,10 +35,11 @@
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/kvm_pkvm.h>
-#include <asm/kvm_emulate.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/sections.h>
#include <kvm/arm_hypercalls.h>
@@ -69,15 +70,42 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static bool pkvm_ext_allowed(struct kvm *kvm, long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
- int r;
- u64 new_cap;
+ int r = -EINVAL;
if (cap->flags)
return -EINVAL;
+ if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap))
+ return -EINVAL;
+
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
@@ -86,9 +114,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
- if (!system_supports_mte() || kvm->created_vcpus) {
- r = -EINVAL;
- } else {
+ if (system_supports_mte() && !kvm->created_vcpus) {
r = 0;
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
@@ -99,25 +125,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
- new_cap = cap->args[0];
-
mutex_lock(&kvm->slots_lock);
/*
* To keep things simple, allow changing the chunk
* size only when no memory slots have been created.
*/
- if (!kvm_are_all_memslots_empty(kvm)) {
- r = -EINVAL;
- } else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
- r = -EINVAL;
- } else {
- r = 0;
- kvm->arch.mmu.split_page_chunk_size = new_cap;
+ if (kvm_are_all_memslots_empty(kvm)) {
+ u64 new_cap = cap->args[0];
+
+ if (!new_cap || kvm_is_block_size_supported(new_cap)) {
+ r = 0;
+ kvm->arch.mmu.split_page_chunk_size = new_cap;
+ }
}
mutex_unlock(&kvm->slots_lock);
break;
default:
- r = -EINVAL;
break;
}
@@ -195,6 +218,23 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm)
kvm_sys_regs_create_debugfs(kvm);
}
+static void kvm_destroy_mpidr_data(struct kvm *kvm)
+{
+ struct kvm_mpidr_data *data;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ data = rcu_dereference_protected(kvm->arch.mpidr_data,
+ lockdep_is_held(&kvm->arch.config_lock));
+ if (data) {
+ rcu_assign_pointer(kvm->arch.mpidr_data, NULL);
+ synchronize_rcu();
+ kfree(data);
+ }
+
+ mutex_unlock(&kvm->arch.config_lock);
+}
+
/**
* kvm_arch_destroy_vm - destroy the VM data structure
* @kvm: pointer to the KVM struct
@@ -209,7 +249,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
- kfree(kvm->arch.mpidr_data);
+ kvm_destroy_mpidr_data(kvm);
+
kfree(kvm->arch.sysreg_masks);
kvm_destroy_vcpus(kvm);
@@ -218,9 +259,47 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_arm_teardown_hypercalls(kvm);
}
+static bool kvm_has_full_ptr_auth(void)
+{
+ bool apa, gpa, api, gpi, apa3, gpa3;
+ u64 isar1, isar2, val;
+
+ /*
+ * Check that:
+ *
+ * - both Address and Generic auth are implemented for a given
+ * algorithm (Q5, IMPDEF or Q3)
+ * - only a single algorithm is implemented.
+ */
+ if (!system_has_full_ptr_auth())
+ return false;
+
+ isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ apa = !!FIELD_GET(ID_AA64ISAR1_EL1_APA_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPA_MASK, isar1);
+ gpa = (val == ID_AA64ISAR1_EL1_GPA_IMP);
+
+ api = !!FIELD_GET(ID_AA64ISAR1_EL1_API_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPI_MASK, isar1);
+ gpi = (val == ID_AA64ISAR1_EL1_GPI_IMP);
+
+ apa3 = !!FIELD_GET(ID_AA64ISAR2_EL1_APA3_MASK, isar2);
+ val = FIELD_GET(ID_AA64ISAR2_EL1_GPA3_MASK, isar2);
+ gpa3 = (val == ID_AA64ISAR2_EL1_GPA3_IMP);
+
+ return (apa == gpa && api == gpi && apa3 == gpa3 &&
+ (apa + api + apa3) == 1);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
+
+ if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext))
+ return 0;
+
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
@@ -311,7 +390,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
- r = system_has_full_ptr_auth();
+ r = kvm_has_full_ptr_auth();
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
if (kvm)
@@ -378,12 +457,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
- /*
- * Default value for the FP state, will be overloaded at load
- * time if we support FP (pretty likely)
- */
- vcpu->arch.fp_state = FP_STATE_FREE;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -395,6 +468,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+ /*
+ * This vCPU may have been created after mpidr_data was initialized.
+ * Throw out the pre-computed mappings if that is the case which forces
+ * KVM to fall back to iteratively searching the vCPUs.
+ */
+ kvm_destroy_mpidr_data(vcpu->kvm);
+
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
@@ -428,6 +508,44 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
}
+static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_ptrauth(vcpu)) {
+ /*
+ * Either we're running running an L2 guest, and the API/APK
+ * bits come from L1's HCR_EL2, or API/APK are both set.
+ */
+ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, HCR_EL2);
+ val &= (HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 |= val;
+ } else {
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+ }
+
+ /*
+ * Save the host keys if there is any chance for the guest
+ * to use pauth, as the entry code will reload the guest
+ * keys in that case.
+ * Protected mode is the exception to that rule, as the
+ * entry into the EL2 code eagerly switch back and forth
+ * between host and hyp keys (and kvm_hyp_ctxt is out of
+ * reach anyway).
+ */
+ if (is_protected_kvm_enabled())
+ return;
+
+ if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) {
+ struct kvm_cpu_context *ctxt;
+ ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt);
+ ptrauth_save_keys(ctxt);
+ }
+ }
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_s2_mmu *mmu;
@@ -466,8 +584,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
else
vcpu_set_wfx_traps(vcpu);
- if (vcpu_has_ptrauth(vcpu))
- vcpu_ptrauth_disable(vcpu);
+ vcpu_set_pauth_traps(vcpu);
+
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
@@ -580,11 +698,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
}
#endif
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
-}
-
static void kvm_init_mpidr_data(struct kvm *kvm)
{
struct kvm_mpidr_data *data = NULL;
@@ -594,7 +707,8 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
mutex_lock(&kvm->arch.config_lock);
- if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+ if (rcu_access_pointer(kvm->arch.mpidr_data) ||
+ atomic_read(&kvm->online_vcpus) == 1)
goto out;
kvm_for_each_vcpu(c, vcpu, kvm) {
@@ -631,7 +745,7 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
data->cmpidr_to_idx[index] = c;
}
- kvm->arch.mpidr_data = data;
+ rcu_assign_pointer(kvm->arch.mpidr_data, data);
out:
mutex_unlock(&kvm->arch.config_lock);
}
@@ -790,9 +904,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
vcpu_set_flag(vcpu, IN_WFI);
- vgic_v4_put(vcpu);
+ kvm_vgic_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
@@ -800,7 +913,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_disable();
vcpu_clear_flag(vcpu, IN_WFI);
- vgic_v4_load(vcpu);
+ kvm_vgic_load(vcpu);
preempt_enable();
}
@@ -980,7 +1093,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
- if (ret)
+ if (ret <= 0)
return ret;
}
@@ -1270,7 +1383,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!system_supports_sve())
clear_bit(KVM_ARM_VCPU_SVE, &features);
- if (!system_has_full_ptr_auth()) {
+ if (!kvm_has_full_ptr_auth()) {
clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features);
clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features);
}
@@ -1971,7 +2084,7 @@ static void cpu_set_hyp_vector(void)
static void cpu_hyp_init_context(void)
{
- kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
+ kvm_init_host_cpu_context(host_data_ptr(host_ctxt));
if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
@@ -2470,21 +2583,27 @@ out_err:
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL;
+ struct kvm_mpidr_data *data;
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
- if (kvm->arch.mpidr_data) {
- u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+ rcu_read_lock();
+ data = rcu_dereference(kvm->arch.mpidr_data);
+
+ if (data) {
+ u16 idx = kvm_mpidr_index(data, mpidr);
- vcpu = kvm_get_vcpu(kvm,
- kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+ vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]);
if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
vcpu = NULL;
+ }
+ rcu_read_unlock();
+
+ if (vcpu)
return vcpu;
- }
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 4697ba41b3a9..72d733c74a38 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2117,6 +2117,26 @@ inject:
return true;
}
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ bool control_bit_set;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+ if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return true;
+ }
+ return false;
+}
+
+bool forward_smc_trap(struct kvm_vcpu *vcpu)
+{
+ return forward_traps(vcpu, HCR_TSC);
+}
+
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;
@@ -2152,37 +2172,39 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
{
- u64 spsr, elr, mode;
- bool direct_eret;
+ u64 spsr, elr, esr;
/*
- * Going through the whole put/load motions is a waste of time
- * if this is a VHE guest hypervisor returning to its own
- * userspace, or the hypervisor performing a local exception
- * return. No need to save/restore registers, no need to
- * switch S2 MMU. Just do the canonical ERET.
+ * Forward this trap to the virtual EL2 if the virtual
+ * HCR_EL2.NV bit is set and this is coming from !EL2.
*/
- spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
- spsr = kvm_check_illegal_exception_return(vcpu, spsr);
-
- mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
- direct_eret = (mode == PSR_MODE_EL0t &&
- vcpu_el2_e2h_is_set(vcpu) &&
- vcpu_el2_tge_is_set(vcpu));
- direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
-
- if (direct_eret) {
- *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
- *vcpu_cpsr(vcpu) = spsr;
- trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
+ if (forward_traps(vcpu, HCR_NV))
return;
+
+ /* Check for an ERETAx */
+ esr = kvm_vcpu_get_esr(vcpu);
+ if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
+ /*
+ * Oh no, ERETAx failed to authenticate. If we have
+ * FPACCOMBINE, deliver an exception right away. If we
+ * don't, then let the mangled ELR value trickle down the
+ * ERET handling, and the guest will have a little surprise.
+ */
+ if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
+ esr &= ESR_ELx_ERET_ISS_ERETA;
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
+ kvm_inject_nested_sync(vcpu, esr);
+ return;
+ }
}
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- elr = __vcpu_sys_reg(vcpu, ELR_EL2);
+ spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
+ spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+ if (!esr_iss_is_eretax(esr))
+ elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 826307e19e3a..1807d3a79a8a 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -14,19 +14,6 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
-{
- struct task_struct *p = vcpu->arch.parent_task;
- struct user_fpsimd_state *fpsimd;
-
- if (!is_protected_kvm_enabled() || !p)
- return;
-
- fpsimd = &p->thread.uw.fpsimd_state;
- kvm_unshare_hyp(fpsimd, fpsimd + 1);
- put_task_struct(p);
-}
-
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -38,30 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
*/
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
- int ret;
-
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+ int ret;
- kvm_vcpu_unshare_task_fp(vcpu);
+ /* pKVM has its own tracking of the host fpsimd state. */
+ if (is_protected_kvm_enabled())
+ return 0;
/* Make sure the host task fpsimd state is visible to hyp: */
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
return ret;
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-
- /*
- * We need to keep current's task_struct pinned until its data has been
- * unshared with the hypervisor to make sure it is not re-used by the
- * kernel and donated to someone else while already shared -- see
- * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
- */
- if (is_protected_kvm_enabled()) {
- get_task_struct(current);
- vcpu->arch.parent_task = current;
- }
-
return 0;
}
@@ -86,7 +61,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
* FP_STATE_FREE if the flag set.
*/
- vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -110,7 +86,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* been saved, this is very unlikely to happen.
*/
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
}
}
@@ -126,7 +102,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
}
/*
@@ -142,8 +118,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
-
+ if (guest_owns_fp_regs()) {
/*
* Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to.
@@ -196,16 +171,38 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
isb();
}
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
- /* Restore the VL that was saved when bound to the CPU */
+ /*
+ * Restore the VL that was saved when bound to the CPU,
+ * which is the maximum VL for the guest. Because the
+ * layout of the data when saving the sve state depends
+ * on the VL, we need to use a consistent (i.e., the
+ * maximum) VL.
+ * Note that this means that at guest exit ZCR_EL1 is
+ * not necessarily the same as on guest entry.
+ *
+ * Restoring the VL isn't needed in VHE mode since
+ * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same
+ * role when doing the save from EL2.
+ */
if (!has_vhe())
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
SYS_ZCR_EL1);
}
+ /*
+ * Flush (save and invalidate) the fpsimd/sve state so that if
+ * the host tries to use fpsimd/sve, it's not using stale data
+ * from the guest.
+ *
+ * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the
+ * context unconditionally, in both nVHE and VHE. This allows
+ * the kernel to restore the fpsimd/sve state, including ZCR_EL1
+ * when needed.
+ */
fpsimd_save_and_flush_cpu_state();
} else if (has_vhe() && system_supports_sve()) {
/*
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 617ae6dea5d5..b037f0a0e27e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -56,6 +56,13 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
+ * Forward this trapped smc instruction to the virtual EL2 if
+ * the guest has asked for it.
+ */
+ if (forward_smc_trap(vcpu))
+ return 1;
+
+ /*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
* Trap exception, not a Secure Monitor Call exception [...]"
@@ -207,19 +214,40 @@ static int handle_sve(struct kvm_vcpu *vcpu)
}
/*
- * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
- * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
- * that we can do is give the guest an UNDEF.
+ * Two possibilities to handle a trapping ptrauth instruction:
+ *
+ * - Guest usage of a ptrauth instruction (which the guest EL1 did not
+ * turn into a NOP). If we get here, it is because we didn't enable
+ * ptrauth for the guest. This results in an UNDEF, as it isn't
+ * supposed to use ptrauth without being told it could.
+ *
+ * - Running an L2 NV guest while L1 has left HCR_EL2.API==0, and for
+ * which we reinject the exception into L1.
+ *
+ * Anything else is an emulation bug (hence the WARN_ON + UNDEF).
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
+ if (!vcpu_has_ptrauth(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return 1;
+ }
+
+ /* Really shouldn't be here! */
+ WARN_ON_ONCE(1);
kvm_inject_undefined(vcpu);
return 1;
}
static int kvm_handle_eret(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
+ if (esr_iss_is_eretax(kvm_vcpu_get_esr(vcpu)) &&
+ !vcpu_has_ptrauth(vcpu))
return kvm_handle_ptrauth(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 961bbef104a6..d00093699aaf 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(host_dbg, host_ctxt);
@@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(guest_dbg, guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e3fcf8c4d5b4..a92566f36022 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -27,6 +27,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -39,12 +40,6 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs are owned by the guest */
-static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
-}
-
/* Save the 32-bit only FPSIMD system register state */
static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
@@ -155,7 +150,7 @@ static inline bool cpu_has_amu(void)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
CHECK_FGT_MASKS(HFGRTR_EL2);
@@ -191,7 +186,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
@@ -226,13 +221,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
@@ -254,13 +249,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
- write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
@@ -271,10 +266,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
__deactivate_traps_hfgxtr(vcpu);
}
-static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
{
- u64 hcr = vcpu->arch.hcr_el2;
-
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
@@ -376,8 +369,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
isb();
/* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ if (host_owns_fp_regs())
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
/* Restore the guest state */
if (sve_guest)
@@ -389,7 +382,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
return true;
}
@@ -449,60 +442,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u64 esr)
-{
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(ctxt, key) \
- do { \
- u64 __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
-} while(0)
-
-DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-
-static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu))
- return false;
-
- ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
- __ptrauth_save_key(ctxt, APIA);
- __ptrauth_save_key(ctxt, APIB);
- __ptrauth_save_key(ctxt, APDA);
- __ptrauth_save_key(ctxt, APDB);
- __ptrauth_save_key(ctxt, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
@@ -590,9 +529,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
- return kvm_hyp_handle_ptrauth(vcpu, exit_code);
-
if (kvm_hyp_handle_cntpct(vcpu))
return true;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 82b3d62538a6..22f374e9f532 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -53,7 +53,13 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
+static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ return vcpu_is_protected(&hyp_vcpu->vcpu);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
+void pkvm_host_fpsimd_state_init(void);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7746ea507b6f..53efda0235cf 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
/* Disable and flush Self-Hosted Trace generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 320f2eaa14a9..02746f9d0980 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -600,7 +600,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
- case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed87..d5c48dc98f67 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -39,10 +39,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
- hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
@@ -64,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
- host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
@@ -178,16 +175,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
-static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
-}
-
-static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
-}
-
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
@@ -198,18 +185,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
}
-static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
}
-static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -340,10 +327,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__vgic_v3_read_vmcr),
- HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_save_aprs),
- HANDLE_FUNC(__vgic_v3_restore_aprs),
+ HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 861c76021a25..caba3e4bd09e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ if (!__get_fault_info(esr, &fault)) {
+ /*
+ * We've presumably raced with a page-table change which caused
+ * AT to fail, try again.
+ */
+ return;
+ }
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 26dd9a20ad6e..16aa4875ddb8 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -200,7 +200,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
}
/*
- * Initialize trap register values for protected VMs.
+ * Initialize trap register values in protected mode.
*/
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
@@ -247,6 +247,17 @@ void pkvm_hyp_vm_table_init(void *tbl)
vm_table = tbl;
}
+void pkvm_host_fpsimd_state_init(void)
+{
+ unsigned long i;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+
+ host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
+ }
+}
+
/*
* Return the hyp vm structure corresponding to the handle.
*/
@@ -430,6 +441,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size)
static void __unmap_donated_memory(void *va, size_t size)
{
+ kvm_flush_dcache_to_poc(va, size);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index d57bcb6ab94d..dfe8fe0f7eaf 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
if (is_cpu_on)
boot_args = this_cpu_ptr(&cpu_on_args);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bc58d1b515af..859f22f754d3 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void)
void __noreturn __pkvm_init_finalise(void)
{
- struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
- struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
unsigned long nr_pages, reserved_pages, pfn;
int ret;
@@ -301,6 +300,7 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
+ pkvm_host_fpsimd_state_init();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4fc..6758cd905570 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
__activate_traps_common(vcpu);
val = vcpu->arch.cptr_el2;
@@ -53,7 +53,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TSM;
}
- if (!guest_owns_fp_regs(vcpu)) {
+ if (!guest_owns_fp_regs()) {
if (has_hvhe())
val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
@@ -191,7 +191,6 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -203,13 +202,12 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
- if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
@@ -228,9 +226,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
*/
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-
- if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
@@ -264,7 +260,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@@ -337,7 +333,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -367,7 +363,7 @@ asmlinkage void __noreturn hyp_panic(void)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 2fc68da4036d..ca3c09df8d7c 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -11,13 +11,23 @@
#include <nvhe/mem_protect.h>
struct tlb_inv_context {
- u64 tcr;
+ struct kvm_s2_mmu *mmu;
+ u64 tcr;
+ u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt,
- bool nsh)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+ cxt->mmu = NULL;
+
/*
* We have two requirements:
*
@@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
else
dsb(ish);
+ /*
+ * If we're already in the desired context, then there's nothing to do.
+ */
+ if (vcpu) {
+ /*
+ * We're in guest context. However, for this to work, this needs
+ * to be called from within __kvm_vcpu_run(), which ensures that
+ * __hyp_running_vcpu is set to the current guest vcpu.
+ */
+ if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu))
+ return;
+
+ cxt->mmu = vcpu->arch.hw_mmu;
+ } else {
+ /* We're in host context. */
+ if (mmu == host_s2_mmu)
+ return;
+
+ cxt->mmu = host_s2_mmu;
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
/*
* For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
+ * avoid a Stage-1 walk with the old VMID while we have
+ * the new VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the host S1 MMU is enabled, so
+ * we can simply set the EPD bits to avoid any further
+ * TLB fill. For guests, we ensure that the S1 MMU is
+ * temporarily enabled in the next context.
*/
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, SYS_TCR);
isb();
+
+ if (vcpu) {
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ if (!(val & SCTLR_ELx_M)) {
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ isb();
+ }
+ } else {
+ /* The host S1 MMU is always enabled. */
+ cxt->sctlr = SCTLR_ELx_M;
+ }
}
/*
@@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
* ensuring that we always have an ISB, but not two ISBs back
* to back.
*/
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ if (vcpu)
+ __load_host_stage2();
+ else
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
- __load_host_stage2();
+ struct kvm_s2_mmu *mmu = cxt->mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ if (!mmu)
+ return;
+
+ if (vcpu)
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ else
+ __load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
+ /* Ensure write of the old VMID */
isb();
- /* Restore the host's TCR_EL1 */
+
+ if (!(cxt->sctlr & SCTLR_ELx_M)) {
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ isb();
+ }
+
write_sysreg_el1(cxt->tcr, SYS_TCR);
}
}
@@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, true);
+ enter_vmid_context(mmu, &cxt, true);
/*
* We could do so much better if we had the VA as well.
@@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
start = round_down(start, stride);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
{
- /* Same remark as in __tlb_switch_to_guest() */
+ /* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5a59ef88b646..9e2bbee77491 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -914,12 +914,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+ return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
@@ -979,6 +979,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_pte_needs_update(ctx->old, new))
return -EAGAIN;
+ /* If we're only changing software bits, then store them and go! */
+ if (!kvm_pgtable_walk_shared(ctx) &&
+ !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
+ bool old_is_counted = stage2_pte_is_counted(ctx->old);
+
+ if (old_is_counted != stage2_pte_is_counted(new)) {
+ if (old_is_counted)
+ mm_ops->put_page(ctx->ptep);
+ else
+ mm_ops->get_page(ctx->ptep);
+ }
+ WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
+ return 0;
+ }
+
if (!stage2_try_break_pte(ctx, data->mmu))
return -EAGAIN;
@@ -1370,7 +1385,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable *pgt = ctx->arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
- if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
+ if (!stage2_pte_cacheable(pgt, ctx->old))
return 0;
if (mm_ops->dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 6cb638b184b1..7b397fad26f2 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
-u64 __vgic_v3_read_vmcr(void)
+static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __vgic_v3_write_vmcr(u32 vmcr)
+static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ __vgic_v3_save_aprs(cpu_if);
+ if (cpu_if->vgic_sre)
+ cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
+}
+
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (cpu_if->vgic_sre)
+ __vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
+ __vgic_v3_restore_aprs(cpu_if);
+}
+
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1581df6aec87..d7af5f46f22a 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -33,11 +33,43 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+/*
+ * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1
+ * semantics, irrespective of the configuration), but that cannot be
+ * applied to the actual HW as things would otherwise break badly.
+ *
+ * - TGE: we want the guest to use EL1, which is incompatible with
+ * this bit being set
+ *
+ * - API/APK: they are already accounted for by vcpu_load(), and can
+ * only take effect across a load/put cycle (such as ERET)
+ */
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+
+static u64 __compute_hcr(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (!vcpu_has_nv(vcpu))
+ return hcr;
+
+ if (is_hyp_ctxt(vcpu)) {
+ hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ hcr |= HCR_NV1;
+
+ write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ }
+
+ return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+}
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, __compute_hcr(vcpu));
if (has_cntpoff()) {
struct timer_map map;
@@ -75,7 +107,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
- if (guest_owns_fp_regs(vcpu)) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
} else {
@@ -162,6 +194,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
{
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu;
+
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
@@ -171,6 +205,61 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
{
__vcpu_put_deactivate_traps(vcpu);
__vcpu_put_switch_sysregs(vcpu);
+
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
+}
+
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 spsr, elr, mode;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ *
+ * Unless the trap has to be forwarded further down the line,
+ * of course...
+ */
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) ||
+ (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET))
+ return false;
+
+ spsr = read_sysreg_el1(SYS_SPSR);
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL0t:
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ return false;
+ break;
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ default:
+ return false;
+ }
+
+ /* If ERETAx fails, take the slow path */
+ if (esr_iss_is_eretax(esr)) {
+ if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr)))
+ return false;
+ } else {
+ elr = read_sysreg_el1(SYS_ELR);
+ }
+
+ spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+ write_sysreg_el2(spsr, SYS_SPSR);
+ write_sysreg_el2(elr, SYS_ELR);
+
+ return true;
}
static const exit_handler_fn hyp_exit_handlers[] = {
@@ -182,7 +271,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -197,7 +286,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
- if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -221,8 +310,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
sysreg_save_host_state_vhe(host_ctxt);
@@ -240,11 +328,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
- if (is_hyp_ctxt(vcpu))
- vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
- else
- vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
-
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
@@ -258,7 +341,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -306,7 +389,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index a8b9ea496706..e12bd7d6d2dc 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
/*
@@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 1a60b95381e8..5fa0359f3a87 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -17,8 +17,8 @@ struct tlb_inv_context {
u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
@@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
isb();
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
@@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nshst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -179,7 +179,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -189,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -203,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 200c8019a82a..cd6b7b83e2c3 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -86,7 +86,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
/* Detect an already handled MMIO return */
if (unlikely(!vcpu->mmio_needed))
- return 0;
+ return 1;
vcpu->mmio_needed = 0;
@@ -117,7 +117,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
*/
kvm_incr_pc(vcpu);
- return 0;
+ return 1;
}
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
@@ -133,11 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
/*
* No valid syndrome? Ask userspace for help if it has
* volunteered to do so, and bail out otherwise.
+ *
+ * In the protected VM case, there isn't much userspace can do
+ * though, so directly deliver an exception to the guest.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
+ if (vcpu_is_protected(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ return 1;
+ }
+
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc04bc767865..8bcab0cc3fe9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1522,8 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
read_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
- if (mmu_invalidate_retry(kvm, mmu_seq))
+ if (mmu_invalidate_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
goto out_unlock;
+ }
/*
* If we are not forced to use page mapping, check if we are
@@ -1581,6 +1583,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
memcache,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
+out_unlock:
+ read_unlock(&kvm->mmu_lock);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
@@ -1588,8 +1592,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
-out_unlock:
- read_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}
@@ -1768,40 +1770,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.mmu.pgt)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- /*
- * If the page isn't tagged, defer to user_mem_abort() for sanitising
- * the MTE tags. The S2 pte should have been unmapped by
- * mmu_notifier_invalidate_range_end().
- */
- if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
- return false;
-
- /*
- * We've moved a page around, probably through CoW, so let's treat
- * it just like a translation fault and the map handler will clean
- * the cache to the PoC.
- *
- * The MMU notifiers will have unmapped a huge PMD before calling
- * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
- * therefore we never need to clear out a huge PMD through this
- * calling path and a memcache is not required.
- */
- kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
- PAGE_SIZE, __pfn_to_phys(pfn),
- KVM_PGTABLE_PROT_R, NULL, 0);
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index ced30c90521a..6813c7c7f00a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -35,13 +35,9 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64ISAR1_EL1:
- /* Support everything but PtrAuth and Spec Invalidation */
+ /* Support everything but Spec Invalidation */
val &= ~(GENMASK_ULL(63, 56) |
- NV_FTR(ISAR1, SPECRES) |
- NV_FTR(ISAR1, GPI) |
- NV_FTR(ISAR1, GPA) |
- NV_FTR(ISAR1, API) |
- NV_FTR(ISAR1, APA));
+ NV_FTR(ISAR1, SPECRES));
break;
case SYS_ID_AA64PFR0_EL1:
diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
new file mode 100644
index 000000000000..d5eb3ae876be
--- /dev/null
+++ b/arch/arm64/kvm/pauth.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Primitive PAuth emulation for ERETAA/ERETAB.
+ *
+ * This code assumes that is is run from EL2, and that it is part of
+ * the emulation of ERETAx for a guest hypervisor. That's a lot of
+ * baked-in assumptions and shortcuts.
+ *
+ * Do no reuse for anything else!
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/gpr-num.h>
+#include <asm/kvm_emulate.h>
+#include <asm/pointer_auth.h>
+
+/* PACGA Xd, Xn, Xm */
+#define PACGA(d,n,m) \
+ asm volatile(__DEFINE_ASM_GPR_NUMS \
+ ".inst 0x9AC03000 |" \
+ "(.L__gpr_num_%[Rd] << 0) |" \
+ "(.L__gpr_num_%[Rn] << 5) |" \
+ "(.L__gpr_num_%[Rm] << 16)\n" \
+ : [Rd] "=r" ((d)) \
+ : [Rn] "r" ((n)), [Rm] "r" ((m)))
+
+static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
+ struct ptrauth_key ikey)
+{
+ struct ptrauth_key gkey;
+ u64 mod, pac = 0;
+
+ preempt_disable();
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ mod = __vcpu_sys_reg(vcpu, SP_EL2);
+ else
+ mod = read_sysreg(sp_el1);
+
+ gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
+ gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
+
+ __ptrauth_key_install_nosync(APGA, ikey);
+ isb();
+
+ PACGA(pac, ptr, mod);
+ isb();
+
+ __ptrauth_key_install_nosync(APGA, gkey);
+
+ preempt_enable();
+
+ /* PAC in the top 32bits */
+ return pac;
+}
+
+static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
+{
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ bool tbi, tbid;
+
+ /*
+ * Since we are authenticating an instruction address, we have
+ * to take TBID into account. If E2H==0, ignore VA[55], as
+ * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
+ * this case, this is likely a guest bug...
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu)) {
+ tbi = tcr & BIT(20);
+ tbid = tcr & BIT(29);
+ } else if (bit55) {
+ tbi = tcr & TCR_TBI1;
+ tbid = tcr & TCR_TBID1;
+ } else {
+ tbi = tcr & TCR_TBI0;
+ tbid = tcr & TCR_TBID0;
+ }
+
+ return tbi && !tbid;
+}
+
+static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
+{
+ static const int maxtxsz = 39; // Revisit these two values once
+ static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ int txsz;
+
+ if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
+ txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ else
+ txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+
+ return 64 - clamp(txsz, mintxsz, maxtxsz);
+}
+
+static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
+{
+ int bottom_pac;
+ u64 mask;
+
+ bottom_pac = compute_bottom_pac(vcpu, bit55);
+
+ mask = GENMASK(54, bottom_pac);
+ if (!effective_tbi(vcpu, bit55))
+ mask |= GENMASK(63, 56);
+
+ return mask;
+}
+
+static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
+{
+ bool bit55 = !!(ptr & BIT(55));
+
+ if (bit55)
+ return ptr | mask;
+
+ return ptr & ~mask;
+}
+
+static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
+{
+ bool bit55 = !!(ptr & BIT(55));
+ u64 mask, error_code;
+ int shift;
+
+ if (effective_tbi(vcpu, bit55)) {
+ mask = GENMASK(54, 53);
+ shift = 53;
+ } else {
+ mask = GENMASK(62, 61);
+ shift = 61;
+ }
+
+ if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
+ error_code = 2 << shift;
+ else
+ error_code = 1 << shift;
+
+ ptr &= ~mask;
+ ptr |= error_code;
+
+ return ptr;
+}
+
+/*
+ * Authenticate an ERETAA/ERETAB instruction, returning true if the
+ * authentication succeeded and false otherwise. In all cases, *elr
+ * contains the VA to ERET to. Potential exception injection is left
+ * to the caller.
+ */
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 ptr, cptr, pac, mask;
+ struct ptrauth_key ikey;
+
+ *elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
+
+ /* We assume we're already in the context of an ERETAx */
+ if (esr_iss_is_eretab(esr)) {
+ if (!(sctlr & SCTLR_EL1_EnIB))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
+ } else {
+ if (!(sctlr & SCTLR_EL1_EnIA))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
+ }
+
+ mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
+ cptr = to_canonical_addr(vcpu, ptr, mask);
+
+ pac = compute_pac(vcpu, cptr, ikey);
+
+ /*
+ * Slightly deviate from the pseudocode: if we have a PAC
+ * match with the signed pointer, then it must be good.
+ * Anything after this point is pure error handling.
+ */
+ if ((pac & mask) == (ptr & mask)) {
+ *elr = cptr;
+ return true;
+ }
+
+ /*
+ * Authentication failed, corrupt the canonical address if
+ * PAuth2 isn't implemented, or some XORing if it is.
+ */
+ if (!kvm_has_pauth(vcpu->kvm, PAuth2))
+ cptr = corrupt_addr(vcpu, cptr);
+ else
+ cptr = ptr ^ (pac & mask);
+
+ *elr = cptr;
+ return false;
+}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index b7be96a53597..85117ea8f351 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -222,7 +222,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
int pkvm_init_host_vm(struct kvm *host_kvm)
{
- mutex_init(&host_kvm->lock);
return 0;
}
@@ -259,6 +258,7 @@ static int __init finalize_pkvm(void)
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
ret = pkvm_drop_host_privileges();
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index a243934c5568..329819806096 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val)
if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
return false;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
return true;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 68d1d05672bd..1b7b58cb121f 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -151,7 +151,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
void *sve_state = vcpu->arch.sve_state;
- kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c9f4f387155f..22b45a15d068 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1568,17 +1568,31 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
return IDREG(vcpu->kvm, reg_to_encoding(r));
}
+static bool is_feature_id_reg(u32 encoding)
+{
+ return (sys_reg_Op0(encoding) == 3 &&
+ (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
+ sys_reg_CRn(encoding) == 0 &&
+ sys_reg_CRm(encoding) <= 7);
+}
+
/*
* Return true if the register's (Op0, Op1, CRn, CRm, Op2) is
- * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID
+ * registers KVM maintains on a per-VM basis.
*/
-static inline bool is_id_reg(u32 id)
+static inline bool is_vm_ftr_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
sys_reg_CRm(id) < 8);
}
+static inline bool is_vcpu_ftr_id_reg(u32 id)
+{
+ return is_feature_id_reg(id) && !is_vm_ftr_id_reg(id);
+}
+
static inline bool is_aa32_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
@@ -2338,7 +2352,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR0_EL1_TGRAN16_2)),
ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
ID_AA64MMFR1_EL1_HCX |
- ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_TWED |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_VH |
@@ -3069,12 +3082,14 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
- kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
+ kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n",
+ &table[i], i, table[i].name);
return false;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
+ kvm_err("sys_reg table %pS entry %d (%s -> %s) out of order\n",
+ &table[i], i, table[i - 1].name, table[i].name);
return false;
}
}
@@ -3509,26 +3524,25 @@ void kvm_sys_regs_create_debugfs(struct kvm *kvm)
&idregs_debug_fops);
}
-static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
+static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg)
{
- const struct sys_reg_desc *idreg = first_idreg;
- u32 id = reg_to_encoding(idreg);
+ u32 id = reg_to_encoding(reg);
struct kvm *kvm = vcpu->kvm;
if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
return;
lockdep_assert_held(&kvm->arch.config_lock);
+ IDREG(kvm, id) = reg->reset(vcpu, reg);
+}
- /* Initialize all idregs */
- while (is_id_reg(id)) {
- IDREG(kvm, id) = idreg->reset(vcpu, idreg);
-
- idreg++;
- id = reg_to_encoding(idreg);
- }
+static void reset_vcpu_ftr_id_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *reg)
+{
+ if (kvm_vcpu_initialized(vcpu))
+ return;
- set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+ reg->reset(vcpu, reg);
}
/**
@@ -3540,19 +3554,24 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
*/
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long i;
- kvm_reset_id_regs(vcpu);
-
for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) {
const struct sys_reg_desc *r = &sys_reg_descs[i];
- if (is_id_reg(reg_to_encoding(r)))
+ if (!r->reset)
continue;
- if (r->reset)
+ if (is_vm_ftr_id_reg(reg_to_encoding(r)))
+ reset_vm_ftr_id_reg(vcpu, r);
+ else if (is_vcpu_ftr_id_reg(reg_to_encoding(r)))
+ reset_vcpu_ftr_id_reg(vcpu, r);
+ else
r->reset(vcpu, r);
}
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
}
/**
@@ -3978,14 +3997,6 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
sys_reg_CRm(r), \
sys_reg_Op2(r))
-static bool is_feature_id_reg(u32 encoding)
-{
- return (sys_reg_Op0(encoding) == 3 &&
- (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
- sys_reg_CRn(encoding) == 0 &&
- sys_reg_CRm(encoding) <= 7);
-}
-
int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range)
{
const void *zero_page = page_to_virt(ZERO_PAGE(0));
@@ -4014,7 +4025,7 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
* compliant with a given revision of the architecture, but the
* RES0/RES1 definitions allow us to do that.
*/
- if (is_id_reg(encoding)) {
+ if (is_vm_ftr_id_reg(encoding)) {
if (!reg->val ||
(is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0()))
continue;
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 389025ce7749..bcbc8c986b1d 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -28,27 +28,65 @@ struct vgic_state_iter {
int nr_lpis;
int dist_id;
int vcpu_id;
- int intid;
+ unsigned long intid;
int lpi_idx;
- u32 *lpi_array;
};
-static void iter_next(struct vgic_state_iter *iter)
+static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
if (iter->dist_id == 0) {
iter->dist_id++;
return;
}
+ /*
+ * Let the xarray drive the iterator after the last SPI, as the iterator
+ * has exhausted the sequentially-allocated INTID space.
+ */
+ if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+ if (iter->lpi_idx < iter->nr_lpis)
+ xa_find_after(&dist->lpi_xa, &iter->intid,
+ VGIC_LPI_MAX_INTID,
+ LPI_XA_MARK_DEBUG_ITER);
+ iter->lpi_idx++;
+ return;
+ }
+
iter->intid++;
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
++iter->vcpu_id < iter->nr_cpus)
iter->intid = 0;
+}
- if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
- if (iter->lpi_idx < iter->nr_lpis)
- iter->intid = iter->lpi_array[iter->lpi_idx];
- iter->lpi_idx++;
+static int iter_mark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+ int nr_lpis = 0;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ if (!vgic_try_get_irq_kref(irq))
+ continue;
+
+ xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ nr_lpis++;
+ }
+
+ return nr_lpis;
+}
+
+static void iter_unmark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ vgic_put_irq(kvm, irq);
}
}
@@ -61,15 +99,12 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
iter->nr_cpus = nr_cpus;
iter->nr_spis = kvm->arch.vgic.nr_spis;
- if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
- if (iter->nr_lpis < 0)
- iter->nr_lpis = 0;
- }
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ iter->nr_lpis = iter_mark_lpis(kvm);
/* Fast forward to the right position if needed */
while (pos--)
- iter_next(iter);
+ iter_next(kvm, iter);
}
static bool end_of_vgic(struct vgic_state_iter *iter)
@@ -114,7 +149,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
struct vgic_state_iter *iter = kvm->arch.vgic.iter;
++*pos;
- iter_next(iter);
+ iter_next(kvm, iter);
if (end_of_vgic(iter))
iter = NULL;
return iter;
@@ -134,13 +169,14 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
- kfree(iter->lpi_array);
+ iter_unmark_lpis(kvm);
kfree(iter);
kvm->arch.vgic.iter = NULL;
mutex_unlock(&kvm->arch.config_lock);
}
-static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist,
+ struct vgic_state_iter *iter)
{
bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
@@ -149,7 +185,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3)
- seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
+ seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis);
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
@@ -236,7 +272,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
unsigned long flags;
if (iter->dist_id == 0) {
- print_dist_state(s, &kvm->arch.vgic);
+ print_dist_state(s, &kvm->arch.vgic, iter);
return 0;
}
@@ -246,11 +282,13 @@ static int vgic_debug_show(struct seq_file *s, void *v)
if (iter->vcpu_id < iter->nr_cpus)
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
+ /*
+ * Expect this to succeed, as iter_mark_lpis() takes a reference on
+ * every LPI to be visited.
+ */
irq = vgic_get_irq(kvm, vcpu, iter->intid);
- if (!irq) {
- seq_printf(s, " LPI %4d freed\n", iter->intid);
- return 0;
- }
+ if (WARN_ON_ONCE(!irq))
+ return -EINVAL;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
print_irq_state(s, irq, vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index f20941f83a07..8f5b7a3e7009 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,8 +53,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- INIT_LIST_HEAD(&dist->lpi_translation_cache);
- raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
@@ -182,27 +180,22 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
-/**
- * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
- * structures and register VCPU-specific KVM iodevs
- *
- * @vcpu: pointer to the VCPU being created and initialized
- *
- * Only do initialization, but do not actually enable the
- * VGIC CPU interface
- */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int ret = 0;
int i;
- vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ lockdep_assert_held(&vcpu->kvm->arch.config_lock);
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- raw_spin_lock_init(&vgic_cpu->ap_list_lock);
- atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+ if (vgic_cpu->private_irqs)
+ return 0;
+
+ vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS,
+ sizeof(struct vgic_irq),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!vgic_cpu->private_irqs)
+ return -ENOMEM;
/*
* Enable and configure all SGIs to be edge-triggered and
@@ -227,9 +220,48 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
}
}
+ return 0;
+}
+
+static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
+ * structures and register VCPU-specific KVM iodevs
+ *
+ * @vcpu: pointer to the VCPU being created and initialized
+ *
+ * Only do initialization, but do not actually enable the
+ * VGIC CPU interface
+ */
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int ret = 0;
+
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
+ ret = vgic_allocate_private_irqs(vcpu);
+ if (ret)
+ return ret;
+
/*
* If we are creating a VCPU with a GICv3 we must also register the
* KVM io device for the redistributor that belongs to this VCPU.
@@ -285,10 +317,13 @@ int vgic_init(struct kvm *kvm)
/* Initialize groups on CPUs created before the VGIC type was known */
kvm_for_each_vcpu(idx, vcpu, kvm) {
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ if (ret)
+ goto out;
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+ struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i);
+
switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->group = 1;
@@ -300,14 +335,15 @@ int vgic_init(struct kvm *kvm)
break;
default:
ret = -EINVAL;
- goto out;
}
+
+ vgic_put_irq(kvm, irq);
+
+ if (ret)
+ goto out;
}
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_init(kvm);
-
/*
* If we have GICv4.1 enabled, unconditionally request enable the
* v4 support so that we get HW-accelerated vSGIs. Otherwise, only
@@ -361,9 +397,6 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_destroy(kvm);
-
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
@@ -381,6 +414,9 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ kfree(vgic_cpu->private_irqs);
+ vgic_cpu->private_irqs = NULL;
+
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
vgic_unregister_redist_iodev(vcpu);
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e85a495ada9c..40bb43f20bf3 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -23,6 +23,8 @@
#include "vgic.h"
#include "vgic-mmio.h"
+static struct kvm_device_ops kvm_arm_vgic_its_ops;
+
static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
@@ -67,7 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->target_vcpu = vcpu;
irq->group = 1;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ xa_lock_irqsave(&dist->lpi_xa, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@@ -82,17 +84,14 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
- ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+ ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
- goto out_unlock;
}
- atomic_inc(&dist->lpi_count);
-
out_unlock:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_unlock_irqrestore(&dist->lpi_xa, flags);
if (ret)
return ERR_PTR(ret);
@@ -150,14 +149,6 @@ struct its_ite {
u32 event_id;
};
-struct vgic_translation_cache_entry {
- struct list_head entry;
- phys_addr_t db;
- u32 devid;
- u32 eventid;
- struct vgic_irq *irq;
-};
-
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -252,8 +243,10 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
#define GIC_LPI_OFFSET 8192
-#define VITS_TYPER_IDBITS 16
-#define VITS_TYPER_DEVBITS 16
+#define VITS_TYPER_IDBITS 16
+#define VITS_MAX_EVENTID (BIT(VITS_TYPER_IDBITS) - 1)
+#define VITS_TYPER_DEVBITS 16
+#define VITS_MAX_DEVID (BIT(VITS_TYPER_DEVBITS) - 1)
#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1)
#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1)
@@ -316,53 +309,6 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0;
}
-#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
-
-/*
- * Create a snapshot of the current LPIs targeting @vcpu, so that we can
- * enumerate those LPIs without holding any lock.
- * Returns their number and puts the kmalloc'ed array into intid_ptr.
- */
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
- struct vgic_irq *irq;
- unsigned long flags;
- u32 *intids;
- int irq_count, i = 0;
-
- /*
- * There is an obvious race between allocating the array and LPIs
- * being mapped/unmapped. If we ended up here as a result of a
- * command, we're safe (locks are held, preventing another
- * command). If coming from another path (such as enabling LPIs),
- * we must be careful not to overrun the array.
- */
- irq_count = atomic_read(&dist->lpi_count);
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
- if (!intids)
- return -ENOMEM;
-
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- rcu_read_lock();
-
- xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
- if (i == irq_count)
- break;
- /* We don't need to "get" the IRQ, as we hold the list lock. */
- if (vcpu && irq->target_vcpu != vcpu)
- continue;
- intids[i++] = irq->intid;
- }
-
- rcu_read_unlock();
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-
- *intid_ptr = intids;
- return i;
-}
-
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
@@ -446,23 +392,18 @@ static u32 max_lpis_propbaser(u64 propbaser)
static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
{
gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long intid, flags;
struct vgic_irq *irq;
int last_byte_offset = -1;
int ret = 0;
- u32 *intids;
- int nr_irqs, i;
- unsigned long flags;
u8 pendmask;
- nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
- if (nr_irqs < 0)
- return nr_irqs;
-
- for (i = 0; i < nr_irqs; i++) {
+ xa_for_each(&dist->lpi_xa, intid, irq) {
int byte_offset, bit_nr;
- byte_offset = intids[i] / BITS_PER_BYTE;
- bit_nr = intids[i] % BITS_PER_BYTE;
+ byte_offset = intid / BITS_PER_BYTE;
+ bit_nr = intid % BITS_PER_BYTE;
/*
* For contiguously allocated LPIs chances are we just read
@@ -472,25 +413,23 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
ret = kvm_read_guest_lock(vcpu->kvm,
pendbase + byte_offset,
&pendmask, 1);
- if (ret) {
- kfree(intids);
+ if (ret)
return ret;
- }
+
last_byte_offset = byte_offset;
}
- irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+ irq = vgic_get_irq(vcpu->kvm, NULL, intid);
if (!irq)
continue;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = pendmask & (1U << bit_nr);
+ if (irq->target_vcpu == vcpu)
+ irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
- kfree(intids);
-
return ret;
}
@@ -566,51 +505,52 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
-static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
- phys_addr_t db,
- u32 devid, u32 eventid)
+static struct vgic_its *__vgic_doorbell_to_its(struct kvm *kvm, gpa_t db)
{
- struct vgic_translation_cache_entry *cte;
+ struct kvm_io_device *kvm_io_dev;
+ struct vgic_io_device *iodev;
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, db);
+ if (!kvm_io_dev)
+ return ERR_PTR(-EINVAL);
- if (cte->db != db || cte->devid != devid ||
- cte->eventid != eventid)
- continue;
+ if (kvm_io_dev->ops != &kvm_io_gic_ops)
+ return ERR_PTR(-EINVAL);
- /*
- * Move this entry to the head, as it is the most
- * recently used.
- */
- if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
- list_move(&cte->entry, &dist->lpi_translation_cache);
+ iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+ if (iodev->iodev_type != IODEV_ITS)
+ return ERR_PTR(-EINVAL);
- return cte->irq;
- }
+ return iodev->its;
+}
+
+static unsigned long vgic_its_cache_key(u32 devid, u32 eventid)
+{
+ return (((unsigned long)devid) << VITS_TYPER_IDBITS) | eventid;
- return NULL;
}
static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
u32 devid, u32 eventid)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_its *its;
struct vgic_irq *irq;
- unsigned long flags;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ if (devid > VITS_MAX_DEVID || eventid > VITS_MAX_EVENTID)
+ return NULL;
- irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ its = __vgic_doorbell_to_its(kvm, db);
+ if (IS_ERR(its))
+ return NULL;
+
+ rcu_read_lock();
+
+ irq = xa_load(&its->translation_cache, cache_key);
if (!vgic_try_get_irq_kref(irq))
irq = NULL;
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
return irq;
}
@@ -619,41 +559,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid,
struct vgic_irq *irq)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
- phys_addr_t db;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_irq *old;
/* Do not cache a directly injected interrupt */
if (irq->hw)
return;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-
- if (unlikely(list_empty(&dist->lpi_translation_cache)))
- goto out;
-
- /*
- * We could have raced with another CPU caching the same
- * translation behind our back, so let's check it is not in
- * already
- */
- db = its->vgic_its_base + GITS_TRANSLATER;
- if (__vgic_its_check_cache(dist, db, devid, eventid))
- goto out;
-
- /* Always reuse the last entry (LRU policy) */
- cte = list_last_entry(&dist->lpi_translation_cache,
- typeof(*cte), entry);
-
- /*
- * Caching the translation implies having an extra reference
- * to the interrupt, so drop the potential reference on what
- * was in the cache, and increment it on the new interrupt.
- */
- if (cte->irq)
- vgic_put_irq(kvm, cte->irq);
-
/*
* The irq refcount is guaranteed to be nonzero while holding the
* its_lock, as the ITE (and the reference it holds) cannot be freed.
@@ -661,39 +573,44 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq);
- cte->db = db;
- cte->devid = devid;
- cte->eventid = eventid;
- cte->irq = irq;
+ /*
+ * We could have raced with another CPU caching the same
+ * translation behind our back, ensure we don't leak a
+ * reference if that is the case.
+ */
+ old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
+ if (old)
+ vgic_put_irq(kvm, old);
+}
- /* Move the new translation to the head of the list */
- list_move(&cte->entry, &dist->lpi_translation_cache);
+static void vgic_its_invalidate_cache(struct vgic_its *its)
+{
+ struct kvm *kvm = its->dev->kvm;
+ struct vgic_irq *irq;
+ unsigned long idx;
-out:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_for_each(&its->translation_cache, idx, irq) {
+ xa_erase(&its->translation_cache, idx);
+ vgic_put_irq(kvm, irq);
+ }
}
-void vgic_its_invalidate_cache(struct kvm *kvm)
+void vgic_its_invalidate_all_caches(struct kvm *kvm)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
+ struct kvm_device *dev;
+ struct vgic_its *its;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ rcu_read_lock();
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ list_for_each_entry_rcu(dev, &kvm->devices, vm_node) {
+ if (dev->ops != &kvm_arm_vgic_its_ops)
+ continue;
- vgic_put_irq(kvm, cte->irq);
- cte->irq = NULL;
+ its = dev->private;
+ vgic_its_invalidate_cache(its);
}
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
}
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
@@ -725,8 +642,6 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
- struct kvm_io_device *kvm_io_dev;
- struct vgic_io_device *iodev;
if (!vgic_has_its(kvm))
return ERR_PTR(-ENODEV);
@@ -736,18 +651,7 @@ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
address = (u64)msi->address_hi << 32 | msi->address_lo;
- kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
- if (!kvm_io_dev)
- return ERR_PTR(-EINVAL);
-
- if (kvm_io_dev->ops != &kvm_io_gic_ops)
- return ERR_PTR(-EINVAL);
-
- iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
- if (iodev->iodev_type != IODEV_ITS)
- return ERR_PTR(-EINVAL);
-
- return iodev->its;
+ return __vgic_doorbell_to_its(kvm, address);
}
/*
@@ -883,7 +787,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
* don't bother here since we clear the ITTE anyway and the
* pending state is a property of the ITTE struct.
*/
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
its_free_ite(kvm, ite);
return 0;
@@ -920,7 +824,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->collection = collection;
vcpu = collection_to_vcpu(kvm, collection);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
return update_affinity(ite->irq, vcpu);
}
@@ -955,7 +859,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
switch (type) {
case GITS_BASER_TYPE_DEVICE:
- if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
+ if (id > VITS_MAX_DEVID)
return false;
break;
case GITS_BASER_TYPE_COLLECTION:
@@ -1167,7 +1071,8 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
/* Requires the its_lock to be held. */
-static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
+static void vgic_its_free_device(struct kvm *kvm, struct vgic_its *its,
+ struct its_device *device)
{
struct its_ite *ite, *temp;
@@ -1179,7 +1084,7 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
its_free_ite(kvm, ite);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
list_del(&device->dev_list);
kfree(device);
@@ -1191,7 +1096,7 @@ static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
struct its_device *cur, *temp;
list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
- vgic_its_free_device(kvm, cur);
+ vgic_its_free_device(kvm, its, cur);
}
/* its lock must be held */
@@ -1250,7 +1155,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
* by removing the mapping and re-establishing it.
*/
if (device)
- vgic_its_free_device(kvm, device);
+ vgic_its_free_device(kvm, its, device);
/*
* The spec does not say whether unmapping a not-mapped device
@@ -1281,7 +1186,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!valid) {
vgic_its_free_collection(its, coll_id);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
} else {
struct kvm_vcpu *vcpu;
@@ -1372,23 +1277,19 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
int vgic_its_invall(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int irq_count, i = 0;
- u32 *intids;
-
- irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
- if (irq_count < 0)
- return irq_count;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
- for (i = 0; i < irq_count; i++) {
- struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
+
update_lpi_config(kvm, irq, vcpu, false);
vgic_put_irq(kvm, irq);
}
- kfree(intids);
-
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
@@ -1431,10 +1332,10 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
+ unsigned long intid;
/* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */
vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
@@ -1446,12 +1347,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
if (vcpu1 == vcpu2)
return 0;
- irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
@@ -1460,9 +1357,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vgic_put_irq(kvm, irq);
}
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
- kfree(intids);
return 0;
}
@@ -1813,7 +1709,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
its->enabled = !!(val & GITS_CTLR_ENABLE);
if (!its->enabled)
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
/*
* Try to process any pending commands. This function bails out early
@@ -1914,47 +1810,6 @@ out:
return ret;
}
-/* Default is 16 cached LPIs per vcpu */
-#define LPI_DEFAULT_PCPU_CACHE_SIZE 16
-
-void vgic_lpi_translation_cache_init(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- unsigned int sz;
- int i;
-
- if (!list_empty(&dist->lpi_translation_cache))
- return;
-
- sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
-
- for (i = 0; i < sz; i++) {
- struct vgic_translation_cache_entry *cte;
-
- /* An allocation failure is not fatal */
- cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
- if (WARN_ON(!cte))
- break;
-
- INIT_LIST_HEAD(&cte->entry);
- list_add(&cte->entry, &dist->lpi_translation_cache);
- }
-}
-
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte, *tmp;
-
- vgic_its_invalidate_cache(kvm);
-
- list_for_each_entry_safe(cte, tmp,
- &dist->lpi_translation_cache, entry) {
- list_del(&cte->entry);
- kfree(cte);
- }
-}
-
#define INITIAL_BASER_VALUE \
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
@@ -1987,8 +1842,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
kfree(its);
return ret;
}
-
- vgic_lpi_translation_cache_init(dev->kvm);
}
mutex_init(&its->its_lock);
@@ -2006,6 +1859,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
INIT_LIST_HEAD(&its->device_list);
INIT_LIST_HEAD(&its->collection_list);
+ xa_init(&its->translation_cache);
dev->kvm->arch.vgic.msis_require_devid = true;
dev->kvm->arch.vgic.has_its = true;
@@ -2036,6 +1890,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
vgic_its_free_device_list(kvm, its);
vgic_its_free_collection_list(kvm, its);
+ vgic_its_invalidate_cache(its);
+ xa_destroy(&its->translation_cache);
mutex_unlock(&its->its_lock);
kfree(its);
@@ -2438,7 +2294,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
ret = vgic_its_restore_itt(its, dev);
if (ret) {
- vgic_its_free_device(its->dev->kvm, dev);
+ vgic_its_free_device(its->dev->kvm, its, dev);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index c15ee1df036a..a3983a631b5a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -277,7 +277,7 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
return;
vgic_flush_pending_lpis(vcpu);
- vgic_its_invalidate_cache(vcpu->kvm);
+ vgic_its_invalidate_all_caches(vcpu->kvm);
atomic_set_release(&vgic_cpu->ctlr, 0);
} else {
ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 7e9cdb78f7ce..ae5a44d5702d 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.vctrl_base + GICH_APR);
}
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-
- cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
-}
-
void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- vgic_v2_vmcr_sync(vcpu);
+ cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 4ea3340786b9..ed6e412cd74b 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- /*
- * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
- * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
- * VMCR_EL2 save/restore in the world switch.
- */
- if (likely(cpu_if->vgic_sre))
- kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
-
- kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if);
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
WARN_ON(vgic_v4_load(vcpu));
}
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
- if (likely(cpu_if->vgic_sre))
- cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
-}
-
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
- vgic_v3_vmcr_sync(vcpu);
-
- kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
-
if (has_vhe())
__vgic_v3_deactivate_traps(cpu_if);
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 4ec93587c8cd..f07b3ddff7d4 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -29,9 +29,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * kvm->lpi_list_lock must be taken with IRQs disabled
- * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
- * vgic_irq->irq_lock must be taken with IRQs disabled
+ * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
@@ -126,7 +125,6 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
__xa_erase(&dist->lpi_xa, irq->intid);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
- atomic_dec(&dist->lpi_count);
kfree_rcu(irq, rcu);
}
@@ -939,17 +937,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
vgic_v3_put(vcpu);
}
-void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- return;
-
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_vmcr_sync(vcpu);
- else
- vgic_v3_vmcr_sync(vcpu);
-}
-
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c2b82de8fa3..6106ebd5ba42 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -16,6 +16,7 @@
#define INTERRUPT_ID_BITS_SPIS 10
#define INTERRUPT_ID_BITS_ITS 16
+#define VGIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
#define VGIC_PRI_BITS 5
#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
@@ -214,7 +215,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -253,7 +253,6 @@ bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu);
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
@@ -330,14 +329,11 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
}
bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
-void vgic_lpi_translation_cache_init(struct kvm *kvm);
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
-void vgic_its_invalidate_cache(struct kvm *kvm);
+void vgic_its_invalidate_all_caches(struct kvm *kvm);
/* GICv4.1 MMIO interface */
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 03efd86dce0a..9b5ab6818f7f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -32,6 +32,7 @@
#include <linux/hugetlb.h>
#include <linux/acpi_iort.h>
#include <linux/kmemleak.h>
+#include <linux/execmem.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
@@ -432,3 +433,142 @@ void dump_mem_limit(void)
pr_emerg("Memory Limit: none\n");
}
}
+
+#ifdef CONFIG_EXECMEM
+static u64 module_direct_base __ro_after_init = 0;
+static u64 module_plt_base __ro_after_init = 0;
+
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+{
+ u64 max_pgoff, pgoff;
+
+ if ((end - start) >= size)
+ return 0;
+
+ max_pgoff = (size - (end - start)) / PAGE_SIZE;
+ pgoff = get_random_u32_inclusive(0, max_pgoff);
+
+ return start - pgoff * PAGE_SIZE;
+}
+
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+static int __init module_init_limits(void)
+{
+ u64 kernel_end = (u64)_end;
+ u64 kernel_start = (u64)_text;
+ u64 kernel_size = kernel_end - kernel_start;
+
+ /*
+ * The default modules region is placed immediately below the kernel
+ * image, and is large enough to use the full 2G relocation range.
+ */
+ BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+ BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+ if (!kaslr_enabled()) {
+ if (kernel_size < SZ_128M)
+ module_direct_base = kernel_end - SZ_128M;
+ if (kernel_size < SZ_2G)
+ module_plt_base = kernel_end - SZ_2G;
+ } else {
+ u64 min = kernel_start;
+ u64 max = kernel_end;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+ } else {
+ module_direct_base = random_bounding_box(SZ_128M, min, max);
+ if (module_direct_base) {
+ min = module_direct_base;
+ max = module_direct_base + SZ_128M;
+ }
+ }
+
+ module_plt_base = random_bounding_box(SZ_2G, min, max);
+ }
+
+ pr_info("%llu pages in range for non-PLT usage",
+ module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+ pr_info("%llu pages in range for PLT usage",
+ module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+ return 0;
+}
+
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start = 0, end = 0;
+
+ module_init_limits();
+
+ /*
+ * Where possible, prefer to allocate within direct branch range of the
+ * kernel such that no PLTs are necessary.
+ */
+ if (module_direct_base) {
+ start = module_direct_base;
+ end = module_direct_base + SZ_128M;
+
+ if (module_plt_base) {
+ fallback_start = module_plt_base;
+ fallback_end = module_plt_base + SZ_2G;
+ }
+ } else if (module_plt_base) {
+ start = module_plt_base;
+ end = module_plt_base + SZ_2G;
+ }
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_ROX,
+ .alignment = 1,
+ },
+ [EXECMEM_BPF] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1d88711467bb..720336d28856 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1897,17 +1897,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
}
-void *bpf_jit_alloc_exec(unsigned long size)
-{
- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
-
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bool bpf_jit_supports_subprog_tailcalls(void)
{
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 834cffcfbce3..7ba4b98076de 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -12,6 +12,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
struct pt_regs *regs;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 54ad04dacdee..42331d9a8dd7 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -632,6 +632,15 @@ config RANDOMIZE_BASE_MAX_OFFSET
source "kernel/livepatch/Kconfig"
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on AS_HAS_LVZ_EXTENSION
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization. However, when run without a hypervisor
+ the kernel is theoretically slower and slightly larger.
+
endmenu
config ARCH_SELECT_MEMORY_MODEL
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 2dbec7853ae8..c862672ed953 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -26,4 +26,3 @@ generic-y += poll.h
generic-y += param.h
generic-y += posix_types.h
generic-y += resource.h
-generic-y += kvm_para.h
diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h
index 0ef3b18f8980..d41138abcf26 100644
--- a/arch/loongarch/include/asm/hardirq.h
+++ b/arch/loongarch/include/asm/hardirq.h
@@ -14,9 +14,15 @@ extern void ack_bad_irq(unsigned int irq);
#define NR_IPI 2
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNCTION,
+};
+
typedef struct {
unsigned int ipi_irqs[NR_IPI];
unsigned int __softirq_pending;
+ atomic_t message ____cacheline_aligned_in_smp;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index d8f637f9e400..c3993fd88aba 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -12,6 +12,7 @@
#define INSN_NOP 0x03400000
#define INSN_BREAK 0x002a0000
+#define INSN_HVCL 0x002b8000
#define ADDR_IMMMASK_LU52ID 0xFFF0000000000000
#define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000
@@ -67,6 +68,7 @@ enum reg2_op {
revhd_op = 0x11,
extwh_op = 0x16,
extwb_op = 0x17,
+ cpucfg_op = 0x1b,
iocsrrdb_op = 0x19200,
iocsrrdh_op = 0x19201,
iocsrrdw_op = 0x19202,
diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h
index 218b4da0ea90..480418bc5071 100644
--- a/arch/loongarch/include/asm/irq.h
+++ b/arch/loongarch/include/asm/irq.h
@@ -117,7 +117,16 @@ extern struct fwnode_handle *liointc_handle;
extern struct fwnode_handle *pch_lpc_handle;
extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS];
-extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev);
+static inline int get_percpu_irq(int vector)
+{
+ struct irq_domain *d;
+
+ d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+ if (d)
+ return irq_create_mapping(d, vector);
+
+ return -EINVAL;
+}
#include <asm-generic/irq.h>
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..c87b6ea0ec47 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -31,6 +31,11 @@
#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_GUESTDBG_SW_BP_MASK \
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
+#define KVM_GUESTDBG_VALID_MASK \
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP)
+
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
u64 pages;
@@ -43,6 +48,7 @@ struct kvm_vcpu_stat {
u64 idle_exits;
u64 cpucfg_exits;
u64 signal_exits;
+ u64 hypercall_exits;
};
#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
@@ -64,6 +70,31 @@ struct kvm_world_switch {
#define MAX_PGTABLE_LEVELS 4
+/*
+ * Physical CPUID is used for interrupt routing, there are different
+ * definitions about physical cpuid on different hardwares.
+ *
+ * For LOONGARCH_CSR_CPUID register, max CPUID size if 512
+ * For IPI hardware, max destination CPUID size 1024
+ * For extioi interrupt controller, max destination CPUID size is 256
+ * For msgint interrupt controller, max supported CPUID size is 65536
+ *
+ * Currently max CPUID is defined as 256 for KVM hypervisor, in future
+ * it will be expanded to 4096, including 16 packages at most. And every
+ * package supports at most 256 vcpus
+ */
+#define KVM_MAX_PHYID 256
+
+struct kvm_phyid_info {
+ struct kvm_vcpu *vcpu;
+ bool enabled;
+};
+
+struct kvm_phyid_map {
+ int max_phyid;
+ struct kvm_phyid_info phys_map[KVM_MAX_PHYID];
+};
+
struct kvm_arch {
/* Guest physical mm */
kvm_pte_t *pgd;
@@ -71,6 +102,8 @@ struct kvm_arch {
unsigned long invalid_ptes[MAX_PGTABLE_LEVELS];
unsigned int pte_shifts[MAX_PGTABLE_LEVELS];
unsigned int root_level;
+ spinlock_t phyid_map_lock;
+ struct kvm_phyid_map *phyid_map;
s64 time_offset;
struct kvm_context __percpu *vmcs;
@@ -203,7 +236,6 @@ void kvm_flush_tlb_all(void);
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
new file mode 100644
index 000000000000..4ba2312e5f8c
--- /dev/null
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_KVM_PARA_H
+#define _ASM_LOONGARCH_KVM_PARA_H
+
+/*
+ * Hypercall code field
+ */
+#define HYPERVISOR_KVM 1
+#define HYPERVISOR_VENDOR_SHIFT 8
+#define HYPERCALL_ENCODE(vendor, code) ((vendor << HYPERVISOR_VENDOR_SHIFT) + code)
+
+#define KVM_HCALL_CODE_SERVICE 0
+#define KVM_HCALL_CODE_SWDBG 1
+
+#define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
+#define KVM_HCALL_FUNC_IPI 1
+
+#define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
+
+/*
+ * LoongArch hypercall return code
+ */
+#define KVM_HCALL_SUCCESS 0
+#define KVM_HCALL_INVALID_CODE -1UL
+#define KVM_HCALL_INVALID_PARAMETER -2UL
+
+/*
+ * Hypercall interface for KVM hypervisor
+ *
+ * a0: function identifier
+ * a1-a6: args
+ * Return value will be placed in a0.
+ * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6.
+ */
+static __always_inline long kvm_hypercall0(u64 fid)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r" (fun)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall1(u64 fid, unsigned long arg0)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall2(u64 fid,
+ unsigned long arg0, unsigned long arg1)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall3(u64 fid,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r" (fun), "r" (a1), "r" (a2), "r" (a3)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall4(u64 fid,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static __always_inline long kvm_hypercall5(u64 fid,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3, unsigned long arg4)
+{
+ register long ret asm("a0");
+ register unsigned long fun asm("a0") = fid;
+ register unsigned long a1 asm("a1") = arg0;
+ register unsigned long a2 asm("a2") = arg1;
+ register unsigned long a3 asm("a3") = arg2;
+ register unsigned long a4 asm("a4") = arg3;
+ register unsigned long a5 asm("a5") = arg4;
+
+ __asm__ __volatile__(
+ "hvcl "__stringify(KVM_HCALL_SERVICE)
+ : "=r" (ret)
+ : "r"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5)
+ : "memory"
+ );
+
+ return ret;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
+#endif /* _ASM_LOONGARCH_KVM_PARA_H */
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index 0cb4fdb8a9b5..590a92cb5416 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -81,6 +81,7 @@ void kvm_save_timer(struct kvm_vcpu *vcpu);
void kvm_restore_timer(struct kvm_vcpu *vcpu);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid);
/*
* Loongarch KVM guest interrupt handling
@@ -109,4 +110,14 @@ static inline int kvm_queue_exception(struct kvm_vcpu *vcpu,
return -1;
}
+static inline unsigned long kvm_read_reg(struct kvm_vcpu *vcpu, int num)
+{
+ return vcpu->arch.gprs[num];
+}
+
+static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long val)
+{
+ vcpu->arch.gprs[num] = val;
+}
+
#endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 46366e783c84..eb09adda54b7 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -158,6 +158,18 @@
#define CPUCFG48_VFPU_CG BIT(2)
#define CPUCFG48_RAM_CG BIT(3)
+/*
+ * CPUCFG index area: 0x40000000 -- 0x400000ff
+ * SW emulation for KVM hypervirsor
+ */
+#define CPUCFG_KVM_BASE 0x40000000
+#define CPUCFG_KVM_SIZE 0x100
+
+#define CPUCFG_KVM_SIG (CPUCFG_KVM_BASE + 0)
+#define KVM_SIGNATURE "KVM\0"
+#define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4)
+#define KVM_FEATURE_IPI BIT(1)
+
#ifndef __ASSEMBLY__
/* CSR */
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
new file mode 100644
index 000000000000..0965710f47f2
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+
+#include <linux/static_call_types.h>
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int __init pv_ipi_init(void);
+
+#else
+
+static inline int pv_ipi_init(void)
+{
+ return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/loongarch/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..1c51bdf3516a 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -12,6 +12,13 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
+struct smp_ops {
+ void (*init_ipi)(void);
+ void (*send_ipi_single)(int cpu, unsigned int action);
+ void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action);
+};
+extern struct smp_ops mp_ops;
+
extern int smp_num_siblings;
extern int num_processors;
extern int disabled_cpus;
@@ -24,8 +31,6 @@ void loongson_prepare_cpus(unsigned int max_cpus);
void loongson_boot_secondary(int cpu, struct task_struct *idle);
void loongson_init_secondary(void);
void loongson_smp_finish(void);
-void loongson_send_ipi_single(int cpu, unsigned int action);
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action);
#ifdef CONFIG_HOTPLUG_CPU
int loongson_cpu_disable(void);
void loongson_cpu_die(unsigned int cpu);
@@ -59,9 +64,12 @@ extern int __cpu_logical_map[NR_CPUS];
#define cpu_physical_id(cpu) cpu_logical_map(cpu)
-#define SMP_BOOT_CPU 0x1
-#define SMP_RESCHEDULE 0x2
-#define SMP_CALL_FUNCTION 0x4
+#define ACTION_BOOT_CPU 0
+#define ACTION_RESCHEDULE 1
+#define ACTION_CALL_FUNCTION 2
+#define SMP_BOOT_CPU BIT(ACTION_BOOT_CPU)
+#define SMP_RESCHEDULE BIT(ACTION_RESCHEDULE)
+#define SMP_CALL_FUNCTION BIT(ACTION_CALL_FUNCTION)
struct secondary_data {
unsigned long stack;
@@ -81,12 +89,12 @@ extern void show_ipi_list(struct seq_file *p, int prec);
static inline void arch_send_call_function_single_ipi(int cpu)
{
- loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
+ mp_ops.send_ipi_single(cpu, ACTION_CALL_FUNCTION);
}
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
- loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION);
+ mp_ops.send_ipi_mask(mask, ACTION_CALL_FUNCTION);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
index 109785922cf9..f9abef382317 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -17,6 +17,8 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+
/*
* for KVM_GET_REGS and KVM_SET_REGS
*/
@@ -72,6 +74,8 @@ struct kvm_fpu {
#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1)
#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2)
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
#define LOONGARCH_REG_SHIFT 3
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3a7620b66bc6..c9bfeda89e40 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 73858c9029cc..bff058317062 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -287,6 +287,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
struct kprobe_ctlblk *kcb;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..f4991c03514f 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,23 +87,9 @@ static void __init init_vec_parent_group(void)
acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
}
-static int __init get_ipi_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_IPI);
-
- return -EINVAL;
-}
-
void __init init_IRQ(void)
{
int i;
-#ifdef CONFIG_SMP
- int r, ipi_irq;
- static int ipi_dummy_dev;
-#endif
unsigned int order = get_order(IRQ_STACK_SIZE);
struct page *page;
@@ -113,13 +99,7 @@ void __init init_IRQ(void)
init_vec_parent_group();
irqchip_init();
#ifdef CONFIG_SMP
- ipi_irq = get_ipi_irq();
- if (ipi_irq < 0)
- panic("IPI IRQ mapping failed\n");
- irq_set_percpu_devid(ipi_irq);
- r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev);
- if (r < 0)
- panic("IPI IRQ request failed\n");
+ mp_ops.init_ipi();
#endif
for (i = 0; i < NR_IRQS; i++)
@@ -133,5 +113,5 @@ void __init init_IRQ(void)
per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
}
- set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+ set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
}
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..36d6d9eeb7c7 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -490,12 +490,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}
-void *module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0));
-}
-
static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, struct module *mod)
{
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644
index 000000000000..1633ed4f692f
--- /dev/null
+++ b/arch/loongarch/kernel/paravirt.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/jump_label.h>
+#include <linux/kvm_para.h>
+#include <linux/static_call.h>
+#include <asm/paravirt.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+ int min, old;
+ irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+ old = atomic_fetch_or(BIT(action), &info->message);
+ if (old)
+ return;
+
+ min = cpu_logical_map(cpu);
+ kvm_hypercall3(KVM_HCALL_FUNC_IPI, 1, 0, min);
+}
+
+#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
+
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ int i, cpu, min = 0, max = 0, old;
+ __uint128_t bitmap = 0;
+ irq_cpustat_t *info;
+
+ if (cpumask_empty(mask))
+ return;
+
+ action = BIT(action);
+ for_each_cpu(i, mask) {
+ info = &per_cpu(irq_stat, i);
+ old = atomic_fetch_or(action, &info->message);
+ if (old)
+ continue;
+
+ cpu = cpu_logical_map(i);
+ if (!bitmap) {
+ min = max = cpu;
+ } else if (cpu < min && cpu > (max - KVM_IPI_CLUSTER_SIZE)) {
+ /* cpu < min, and bitmap still enough */
+ bitmap <<= min - cpu;
+ min = cpu;
+ } else if (cpu > min && cpu < (min + KVM_IPI_CLUSTER_SIZE)) {
+ /* cpu > min, and bitmap still enough */
+ max = cpu > max ? cpu : max;
+ } else {
+ /*
+ * With cpu, bitmap will exceed KVM_IPI_CLUSTER_SIZE,
+ * send IPI here directly and skip the remaining CPUs.
+ */
+ kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap,
+ (unsigned long)(bitmap >> BITS_PER_LONG), min);
+ min = max = cpu;
+ bitmap = 0;
+ }
+ __set_bit(cpu - min, (unsigned long *)&bitmap);
+ }
+
+ if (bitmap)
+ kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap,
+ (unsigned long)(bitmap >> BITS_PER_LONG), min);
+}
+
+static irqreturn_t pv_ipi_interrupt(int irq, void *dev)
+{
+ u32 action;
+ irq_cpustat_t *info;
+
+ /* Clear SWI interrupt */
+ clear_csr_estat(1 << INT_SWI0);
+ info = this_cpu_ptr(&irq_stat);
+ action = atomic_xchg(&info->message, 0);
+
+ if (action & SMP_RESCHEDULE) {
+ scheduler_ipi();
+ info->ipi_irqs[IPI_RESCHEDULE]++;
+ }
+
+ if (action & SMP_CALL_FUNCTION) {
+ generic_smp_call_function_interrupt();
+ info->ipi_irqs[IPI_CALL_FUNCTION]++;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void pv_init_ipi(void)
+{
+ int r, swi;
+
+ swi = get_percpu_irq(INT_SWI0);
+ if (swi < 0)
+ panic("SWI0 IRQ mapping failed\n");
+ irq_set_percpu_devid(swi);
+ r = request_percpu_irq(swi, pv_ipi_interrupt, "SWI0-IPI", &irq_stat);
+ if (r < 0)
+ panic("SWI0 IRQ request failed\n");
+}
+#endif
+
+static bool kvm_para_available(void)
+{
+ int config;
+ static int hypervisor_type;
+
+ if (!hypervisor_type) {
+ config = read_cpucfg(CPUCFG_KVM_SIG);
+ if (!memcmp(&config, KVM_SIGNATURE, 4))
+ hypervisor_type = HYPERVISOR_KVM;
+ }
+
+ return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_ipi_init(void)
+{
+ int feature;
+
+ if (!cpu_has_hypervisor)
+ return 0;
+ if (!kvm_para_available())
+ return 0;
+
+ feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+ if (!(feature & KVM_FEATURE_IPI))
+ return 0;
+
+#ifdef CONFIG_SMP
+ mp_ops.init_ipi = pv_init_ipi;
+ mp_ops.send_ipi_single = pv_send_ipi_single;
+ mp_ops.send_ipi_mask = pv_send_ipi_mask;
+#endif
+
+ return 0;
+}
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index cac7cba81b65..f86a4b838dd7 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -456,16 +456,6 @@ static void loongarch_pmu_disable(struct pmu *pmu)
static DEFINE_MUTEX(pmu_reserve_mutex);
static atomic_t active_events = ATOMIC_INIT(0);
-static int get_pmc_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_PCOV);
-
- return -EINVAL;
-}
-
static void reset_counters(void *arg);
static int __hw_perf_event_init(struct perf_event *event);
@@ -473,7 +463,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
on_each_cpu(reset_counters, NULL, 1);
- free_irq(get_pmc_irq(), &loongarch_pmu);
+ free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
mutex_unlock(&pmu_reserve_mutex);
}
}
@@ -562,7 +552,7 @@ static int loongarch_pmu_event_init(struct perf_event *event)
if (event->cpu >= 0 && !cpu_online(event->cpu))
return -ENODEV;
- irq = get_pmc_irq();
+ irq = get_percpu_irq(INT_PCOV);
flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
if (!atomic_inc_not_zero(&active_events)) {
mutex_lock(&pmu_reserve_mutex);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index aabee0b280fe..0dfe2388ef41 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -29,6 +29,7 @@
#include <asm/loongson.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
+#include <asm/paravirt.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/time.h>
@@ -66,11 +67,6 @@ static cpumask_t cpu_core_setup_map;
struct secondary_data cpuboot_data;
static DEFINE_PER_CPU(int, cpu_state);
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNCTION,
-};
-
static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNCTION] = "Function call interrupts",
@@ -190,24 +186,19 @@ static u32 ipi_read_clear(int cpu)
static void ipi_write_action(int cpu, u32 action)
{
- unsigned int irq = 0;
-
- while ((irq = ffs(action))) {
- uint32_t val = IOCSR_IPI_SEND_BLOCKING;
+ uint32_t val;
- val |= (irq - 1);
- val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
- iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
- action &= ~BIT(irq - 1);
- }
+ val = IOCSR_IPI_SEND_BLOCKING | action;
+ val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
+ iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
}
-void loongson_send_ipi_single(int cpu, unsigned int action)
+static void loongson_send_ipi_single(int cpu, unsigned int action)
{
ipi_write_action(cpu_logical_map(cpu), (u32)action);
}
-void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+static void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
{
unsigned int i;
@@ -222,11 +213,11 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
*/
void arch_smp_send_reschedule(int cpu)
{
- loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
+ mp_ops.send_ipi_single(cpu, ACTION_RESCHEDULE);
}
EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
-irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
+static irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
unsigned int action;
unsigned int cpu = smp_processor_id();
@@ -246,6 +237,26 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
+static void loongson_init_ipi(void)
+{
+ int r, ipi_irq;
+
+ ipi_irq = get_percpu_irq(INT_IPI);
+ if (ipi_irq < 0)
+ panic("IPI IRQ mapping failed\n");
+
+ irq_set_percpu_devid(ipi_irq);
+ r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &irq_stat);
+ if (r < 0)
+ panic("IPI IRQ request failed\n");
+}
+
+struct smp_ops mp_ops = {
+ .init_ipi = loongson_init_ipi,
+ .send_ipi_single = loongson_send_ipi_single,
+ .send_ipi_mask = loongson_send_ipi_mask,
+};
+
static void __init fdt_smp_setup(void)
{
#ifdef CONFIG_OF
@@ -289,6 +300,7 @@ void __init loongson_smp_setup(void)
cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
+ pv_ipi_init();
iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
}
@@ -323,7 +335,7 @@ void loongson_boot_secondary(int cpu, struct task_struct *idle)
csr_mail_send(entry, cpu_logical_map(cpu), 0);
- loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
+ loongson_send_ipi_single(cpu, ACTION_BOOT_CPU);
}
/*
@@ -333,7 +345,7 @@ void loongson_init_secondary(void)
{
unsigned int cpu = smp_processor_id();
unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
- ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
+ ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER | ECFGF_SIP0;
change_csr_ecfg(ECFG0_IM, imask);
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index e7015f7b70e3..fd5354f9be7c 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -123,16 +123,6 @@ void sync_counter(void)
csr_write64(init_offset, LOONGARCH_CSR_CNTC);
}
-static int get_timer_irq(void)
-{
- struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
-
- if (d)
- return irq_create_mapping(d, INT_TI);
-
- return -EINVAL;
-}
-
int constant_clockevent_init(void)
{
unsigned int cpu = smp_processor_id();
@@ -142,7 +132,7 @@ int constant_clockevent_init(void)
static int irq = 0, timer_irq_installed = 0;
if (!timer_irq_installed) {
- irq = get_timer_irq();
+ irq = get_percpu_irq(INT_TI);
if (irq < 0)
pr_err("Failed to map irq %d (timer)\n", irq);
}
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ed1d89d53e2e..c86e099af5ca 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <linux/vmalloc.h>
+#include <trace/events/kvm.h>
#include <asm/fpu.h>
#include <asm/inst.h>
#include <asm/loongarch.h>
@@ -20,6 +21,46 @@
#include <asm/kvm_vcpu.h>
#include "trace.h"
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int rd, rj;
+ unsigned int index;
+
+ if (inst.reg2_format.opcode != cpucfg_op)
+ return EMULATE_FAIL;
+
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ ++vcpu->stat.cpucfg_exits;
+ index = vcpu->arch.gprs[rj];
+
+ /*
+ * By LoongArch Reference Manual 2.2.10.5
+ * Return value is 0 for undefined CPUCFG index
+ *
+ * Disable preemption since hw gcsr is accessed
+ */
+ preempt_disable();
+ switch (index) {
+ case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+ vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+ break;
+ case CPUCFG_KVM_SIG:
+ /* CPUCFG emulation between 0x40000000 -- 0x400000ff */
+ vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+ break;
+ case CPUCFG_KVM_FEATURE:
+ vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
+ break;
+ default:
+ vcpu->arch.gprs[rd] = 0;
+ break;
+ }
+ preempt_enable();
+
+ return EMULATE_DONE;
+}
+
static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid)
{
unsigned long val = 0;
@@ -208,8 +249,6 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
{
- int rd, rj;
- unsigned int index;
unsigned long curr_pc;
larch_inst inst;
enum emulation_result er = EMULATE_DONE;
@@ -224,21 +263,7 @@ static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
er = EMULATE_FAIL;
switch (((inst.word >> 24) & 0xff)) {
case 0x0: /* CPUCFG GSPR */
- if (inst.reg2_format.opcode == 0x1B) {
- rd = inst.reg2_format.rd;
- rj = inst.reg2_format.rj;
- ++vcpu->stat.cpucfg_exits;
- index = vcpu->arch.gprs[rj];
- er = EMULATE_DONE;
- /*
- * By LoongArch Reference Manual 2.2.10.5
- * return value is 0 for undefined cpucfg index
- */
- if (index < KVM_MAX_CPUCFG_REGS)
- vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
- else
- vcpu->arch.gprs[rd] = 0;
- }
+ er = kvm_emu_cpucfg(vcpu, inst);
break;
case 0x4: /* CSR{RD,WR,XCHG} GSPR */
er = kvm_handle_csr(vcpu, inst);
@@ -417,6 +442,8 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst)
vcpu->arch.io_gpr = rd;
run->mmio.is_write = 0;
vcpu->mmio_is_write = 0;
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, run->mmio.len,
+ run->mmio.phys_addr, NULL);
} else {
kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
inst.word, vcpu->arch.pc, vcpu->arch.badv);
@@ -463,6 +490,9 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
break;
}
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len,
+ run->mmio.phys_addr, run->mmio.data);
+
return er;
}
@@ -564,6 +594,8 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst)
run->mmio.is_write = 1;
vcpu->mmio_needed = 1;
vcpu->mmio_is_write = 1;
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len,
+ run->mmio.phys_addr, data);
} else {
vcpu->arch.pc = curr_pc;
kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
@@ -685,6 +717,90 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
+{
+ unsigned int min, cpu, i;
+ unsigned long ipi_bitmap;
+ struct kvm_vcpu *dest;
+
+ min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3);
+ for (i = 0; i < 2; i++, min += BITS_PER_LONG) {
+ ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i);
+ if (!ipi_bitmap)
+ continue;
+
+ cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+ while (cpu < BITS_PER_LONG) {
+ dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+ cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1);
+ if (!dest)
+ continue;
+
+ /* Send SWI0 to dest vcpu to emulate IPI interrupt */
+ kvm_queue_irq(dest, INT_SWI0);
+ kvm_vcpu_kick(dest);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Hypercall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_service(struct kvm_vcpu *vcpu)
+{
+ unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0);
+ long ret;
+
+ switch (func) {
+ case KVM_HCALL_FUNC_IPI:
+ kvm_send_pv_ipi(vcpu);
+ ret = KVM_HCALL_SUCCESS;
+ break;
+ default:
+ ret = KVM_HCALL_INVALID_CODE;
+ break;
+ };
+
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret);
+}
+
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+ int ret;
+ larch_inst inst;
+ unsigned int code;
+
+ inst.word = vcpu->arch.badi;
+ code = inst.reg0i15_format.immediate;
+ ret = RESUME_GUEST;
+
+ switch (code) {
+ case KVM_HCALL_SERVICE:
+ vcpu->stat.hypercall_exits++;
+ kvm_handle_service(vcpu);
+ break;
+ case KVM_HCALL_SWDBG:
+ /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */
+ if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) {
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ ret = RESUME_HOST;
+ break;
+ }
+ fallthrough;
+ default:
+ /* Treat it as noop intruction, only set return value */
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE);
+ break;
+ }
+
+ if (ret == RESUME_GUEST)
+ update_pc(&vcpu->arch);
+
+ return ret;
+}
+
/*
* LoongArch KVM callback handling for unimplemented guest exiting
*/
@@ -716,6 +832,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
[EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
+ [EXCCODE_HVC] = kvm_handle_hypercall,
};
int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index a556cff35740..98883aa23ab8 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
range->end << PAGE_SHIFT, &ctx);
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- unsigned long prot_bits;
- kvm_pte_t *ptep;
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
- gpa_t gpa = range->start << PAGE_SHIFT;
-
- ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
- if (!ptep)
- return false;
-
- /* Replacing an absent or old page doesn't need flushes */
- if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
- kvm_set_pte(ptep, 0);
- return false;
- }
-
- /* Fill new pte if write protected or page migrated */
- prot_bits = _PAGE_PRESENT | __READABLE;
- prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);
-
- /*
- * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
- * _PAGE_WRITE for map_page_fast if next page write fault
- * _PAGE_DIRTY since gpa has already recorded as dirty page
- */
- prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
- kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));
-
- return true;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_ptw_ctx ctx;
diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h
index c2484ad4cffa..1783397b1bc8 100644
--- a/arch/loongarch/kvm/trace.h
+++ b/arch/loongarch/kvm/trace.h
@@ -19,14 +19,16 @@ DECLARE_EVENT_CLASS(kvm_transition,
TP_PROTO(struct kvm_vcpu *vcpu),
TP_ARGS(vcpu),
TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
__field(unsigned long, pc)
),
TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
__entry->pc = vcpu->arch.pc;
),
- TP_printk("PC: 0x%08lx", __entry->pc)
+ TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc)
);
DEFINE_EVENT(kvm_transition, kvm_enter,
@@ -54,19 +56,22 @@ DECLARE_EVENT_CLASS(kvm_exit,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
TP_ARGS(vcpu, reason),
TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
__field(unsigned long, pc)
__field(unsigned int, reason)
),
TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
__entry->pc = vcpu->arch.pc;
__entry->reason = reason;
),
- TP_printk("[%s]PC: 0x%08lx",
- __print_symbolic(__entry->reason,
- kvm_trace_symbol_exit_types),
- __entry->pc)
+ TP_printk("vcpu %u [%s] PC: 0x%08lx",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->reason,
+ kvm_trace_symbol_exit_types),
+ __entry->pc)
);
DEFINE_EVENT(kvm_exit, kvm_exit_idle,
@@ -85,14 +90,17 @@ TRACE_EVENT(kvm_exit_gspr,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word),
TP_ARGS(vcpu, inst_word),
TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
__field(unsigned int, inst_word)
),
TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
__entry->inst_word = inst_word;
),
- TP_printk("Inst word: 0x%08x", __entry->inst_word)
+ TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id,
+ __entry->inst_word)
);
#define KVM_TRACE_AUX_SAVE 0
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 3a8779065f73..9e8030d45129 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -19,6 +19,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, idle_exits),
STATS_DESC_COUNTER(VCPU, cpucfg_exits),
STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, hypercall_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -247,7 +248,101 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- return -EINVAL;
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
+ return -EINVAL;
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE)
+ vcpu->guest_debug = dbg->control;
+ else
+ vcpu->guest_debug = 0;
+
+ return 0;
+}
+
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+ int cpuid;
+ struct kvm_phyid_map *map;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (val >= KVM_MAX_PHYID)
+ return -EINVAL;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
+
+ spin_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) {
+ /* Discard duplicated CPUID set operation */
+ if (cpuid == val) {
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+
+ /*
+ * CPUID is already set before
+ * Forbid changing to a different CPUID at runtime
+ */
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+
+ if (map->phys_map[val].enabled) {
+ /* Discard duplicated CPUID set operation */
+ if (vcpu == map->phys_map[val].vcpu) {
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+
+ /*
+ * New CPUID is already set with other vcpu
+ * Forbid sharing the same CPUID between different vcpus
+ */
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+ map->phys_map[val].enabled = true;
+ map->phys_map[val].vcpu = vcpu;
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+
+ return 0;
+}
+
+static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
+{
+ int cpuid;
+ struct kvm_phyid_map *map;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return;
+
+ spin_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if (map->phys_map[cpuid].enabled) {
+ map->phys_map[cpuid].vcpu = NULL;
+ map->phys_map[cpuid].enabled = false;
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
+ }
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+}
+
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
+{
+ struct kvm_phyid_map *map;
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return NULL;
+
+ map = kvm->arch.phyid_map;
+ if (!map->phys_map[cpuid].enabled)
+ return NULL;
+
+ return map->phys_map[cpuid].vcpu;
}
static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
@@ -282,6 +377,9 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
if (get_gcsr_flag(id) & INVALID_GCSR)
return -EINVAL;
+ if (id == LOONGARCH_CSR_CPUID)
+ return kvm_set_cpuid(vcpu, val);
+
if (id == LOONGARCH_CSR_ESTAT) {
/* ESTAT IP0~IP7 inject through GINTC */
gintc = (val >> 2) & 0xff;
@@ -409,6 +507,9 @@ static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
case KVM_REG_LOONGARCH_COUNTER:
*v = drdtime() + vcpu->kvm->arch.time_offset;
break;
+ case KVM_REG_LOONGARCH_DEBUG_INST:
+ *v = INSN_HVCL | KVM_HCALL_SWDBG;
+ break;
default:
ret = -EINVAL;
break;
@@ -924,6 +1025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Set cpuid */
kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
/* Start with no pending virtual guest interrupts */
csr->csrs[LOONGARCH_CSR_GINTC] = 0;
@@ -942,6 +1044,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
hrtimer_cancel(&vcpu->arch.swtimer);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ kvm_drop_cpuid(vcpu);
kfree(vcpu->arch.csr);
/*
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
index 0a37f6fa8f2d..6b2e4f66ad26 100644
--- a/arch/loongarch/kvm/vm.c
+++ b/arch/loongarch/kvm/vm.c
@@ -30,6 +30,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.pgd)
return -ENOMEM;
+ kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT);
+ if (!kvm->arch.phyid_map) {
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ return -ENOMEM;
+ }
+ spin_lock_init(&kvm->arch.phyid_map_lock);
+
kvm_init_vmcs(kvm);
kvm->arch.gpa_size = BIT(cpu_vabits - 1);
kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
@@ -52,6 +60,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_destroy_vcpus(kvm);
free_page((unsigned long)kvm->arch.pgd);
kvm->arch.pgd = NULL;
+ kvfree(kvm->arch.phyid_map);
+ kvm->arch.phyid_map = NULL;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -66,6 +76,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_MP_STATE:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 4dd53427f657..bf789d114c2d 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -24,6 +24,7 @@
#include <linux/gfp.h>
#include <linux/hugetlb.h>
#include <linux/mmzone.h>
+#include <linux/execmem.h>
#include <asm/asm-offsets.h>
#include <asm/bootinfo.h>
@@ -248,3 +249,23 @@ EXPORT_SYMBOL(invalid_pmd_table);
#endif
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
EXPORT_SYMBOL(invalid_pte_table);
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/m68k/include/asm/pgtable.h b/arch/m68k/include/asm/pgtable.h
index 27525c6a12fd..49fcfd734860 100644
--- a/arch/m68k/include/asm/pgtable.h
+++ b/arch/m68k/include/asm/pgtable.h
@@ -2,6 +2,8 @@
#ifndef __M68K_PGTABLE_H
#define __M68K_PGTABLE_H
+#include <asm/page.h>
+
#ifdef __uClinux__
#include <asm/pgtable_no.h>
#else
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 4da7bc4ac4a3..176314f3c9aa 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -4,7 +4,7 @@ CONFIG_AUDIT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_EXPERT=y
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
CONFIG_KALLSYMS_ALL=y
CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
diff --git a/arch/mips/configs/rs90_defconfig b/arch/mips/configs/rs90_defconfig
index 4b9e36d6400e..a53dd66e9b86 100644
--- a/arch/mips/configs/rs90_defconfig
+++ b/arch/mips/configs/rs90_defconfig
@@ -9,7 +9,7 @@ CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y
# CONFIG_SGETMASK_SYSCALL is not set
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_TIMERFD is not set
# CONFIG_AIO is not set
# CONFIG_IO_URING is not set
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
#if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
/* Load modules into 32bit-compatible segment. */
-#define MODULE_START CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR CKSSEG
+#define MODULES_END (FIXADDR_START-2*PAGE_SIZE)
#endif
#define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..ba0f62d8eff5 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -13,7 +13,6 @@
#include <linux/elf.h>
#include <linux/mm.h>
#include <linux/numa.h>
-#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/string.h>
@@ -31,15 +30,6 @@ struct mips_hi16 {
static LIST_HEAD(dbe_list);
static DEFINE_SPINLOCK(dbe_lock);
-#ifdef MODULE_START
-void *module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
static void apply_r_mips_32(u32 *location, u32 base, Elf_Addr v)
{
*location = base + v;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 467ee6b95ae1..c17157e700c0 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return true;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- gpa_t gpa = range->start << PAGE_SHIFT;
- pte_t hva_pte = range->arg.pte;
- pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
- pte_t old_pte;
-
- if (!gpa_pte)
- return false;
-
- /* Mapping may need adjusting depending on memslot flags */
- old_pte = *gpa_pte;
- if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
- hva_pte = pte_mkclean(hva_pte);
- else if (range->slot->flags & KVM_MEM_READONLY)
- hva_pte = pte_wrprotect(hva_pte);
-
- set_pte(gpa_pte, hva_pte);
-
- /* Replacing an absent or old page doesn't need flushes */
- if (!pte_present(old_pte) || !pte_young(old_pte))
- return false;
-
- /* Pages swapped, aged, moved, or cleaned require flushes */
- return !pte_present(hva_pte) ||
- !pte_young(hva_pte) ||
- pte_pfn(old_pte) != pte_pfn(hva_pte) ||
- (pte_dirty(old_pte) && !pte_dirty(hva_pte));
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned long write,
if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
- if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+ if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
#endif
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 39f129205b0c..4583d1a2a73e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -31,6 +31,7 @@
#include <linux/gfp.h>
#include <linux/kcore.h>
#include <linux/initrd.h>
+#include <linux/execmem.h>
#include <asm/bootinfo.h>
#include <asm/cachectl.h>
@@ -576,3 +577,25 @@ EXPORT_SYMBOL_GPL(invalid_pmd_table);
#endif
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
EXPORT_SYMBOL(invalid_pte_table);
+
+#ifdef CONFIG_EXECMEM
+#ifdef MODULES_VADDR
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
#include <asm-generic/pgtable-nopmd.h>
#define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
struct mm_struct;
diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..f4483243578d 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -13,7 +13,6 @@
#include <linux/moduleloader.h>
#include <linux/elf.h>
#include <linux/mm.h>
-#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/string.h>
@@ -21,25 +20,6 @@
#include <asm/cacheflush.h>
-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x80000000 (vmalloc area) to 0xc00000000 (kernel) (kmalloc returns
- * addresses in 0xc0000000)
- */
-void *module_alloc(unsigned long size)
-{
- if (size == 0)
- return NULL;
- return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
- kfree(module_region);
-}
-
int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
unsigned int symindex, unsigned int relsec,
struct module *mod)
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index 7bc82ee889c9..3459df28afee 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/binfmts.h>
+#include <linux/execmem.h>
#include <asm/setup.h>
#include <asm/page.h>
@@ -143,3 +144,23 @@ static const pgprot_t protection_map[16] = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = MKP(1, 1, 1)
};
DECLARE_VM_GET_PAGE_PROT
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL_EXEC,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index ee4febb30386..5ce258f3fffa 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -131,7 +131,7 @@ CONFIG_PPDEV=m
CONFIG_I2C=y
CONFIG_HWMON=m
CONFIG_DRM=m
-CONFIG_DRM_DP_CEC=y
+CONFIG_DRM_DISPLAY_DP_AUX_CEC=y
# CONFIG_DRM_I2C_CH7006 is not set
# CONFIG_DRM_I2C_SIL164 is not set
CONFIG_DRM_RADEON=m
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index ad4e15d12ed1..4bea2e95798f 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -8,6 +8,7 @@
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#ifndef __ASSEMBLY__
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
index 715c96ba2ec8..e84883c6b4c7 100644
--- a/arch/parisc/include/asm/signal.h
+++ b/arch/parisc/include/asm/signal.h
@@ -4,23 +4,11 @@
#include <uapi/asm/signal.h>
-#define _NSIG 64
-/* bits-per-word, where word apparently means 'long' not 'int' */
-#define _NSIG_BPW BITS_PER_LONG
-#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
-
# ifndef __ASSEMBLY__
/* Most things should be clean enough to redefine this at will, if care
is taken to make libc match. */
-typedef unsigned long old_sigset_t; /* at least 32 bits */
-
-typedef struct {
- /* next_signal() assumes this is a long - no choice */
- unsigned long sig[_NSIG_WORDS];
-} sigset_t;
-
#include <asm/sigcontext.h>
#endif /* !__ASSEMBLY */
diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h
index 8e4895c5ea5d..40d7a574c5dd 100644
--- a/arch/parisc/include/uapi/asm/signal.h
+++ b/arch/parisc/include/uapi/asm/signal.h
@@ -57,10 +57,20 @@
#include <asm-generic/signal-defs.h>
+#define _NSIG 64
+#define _NSIG_BPW (sizeof(unsigned long) * 8)
+#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
+
# ifndef __ASSEMBLY__
# include <linux/types.h>
+typedef unsigned long old_sigset_t; /* at least 32 bits */
+
+typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
/* Avoid too many header ordering problems. */
struct siginfo;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 621a4b386ae4..c91f9c2e61ed 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -206,6 +206,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index d214bbe3c2af..4e5d991b2b65 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -41,7 +41,6 @@
#include <linux/moduleloader.h>
#include <linux/elf.h>
-#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/ftrace.h>
#include <linux/string.h>
@@ -173,17 +172,6 @@ static inline int reassemble_22(int as22)
((as22 & 0x0003ff) << 3));
}
-void *module_alloc(unsigned long size)
-{
- /* using RWX means less protection for modules, but it's
- * easier than trying to map the text, data, init_text and
- * init_data correctly */
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL,
- PAGE_KERNEL_RWX, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-
#ifndef CONFIG_64BIT
static inline unsigned long count_gots(const Elf_Rela *rela, unsigned long n)
{
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 6ce427b58836..34495446e051 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -26,12 +26,6 @@
#define FPUDEBUG 0
-/* Format of the floating-point exception registers. */
-struct exc_reg {
- unsigned int exception : 6;
- unsigned int ei : 26;
-};
-
/* Macros for grabbing bits of the instruction format from the 'ei'
field above. */
/* Major opcode 0c and 0e */
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f876af56e13f..34d91cb8b259 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -24,6 +24,7 @@
#include <linux/nodemask.h> /* for node_online_map */
#include <linux/pagemap.h> /* for release_pages */
#include <linux/compat.h>
+#include <linux/execmem.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -481,7 +482,7 @@ void free_initmem(void)
/* finally dump all the instructions which were cached, since the
* pages are no-longer executable */
flush_icache_range(init_begin, init_end);
-
+
free_initmem_default(POISON_FREE_INITMEM);
/* set up a new led state on systems shipped LED State panel */
@@ -992,3 +993,23 @@ static const pgprot_t protection_map[16] = {
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
};
DECLARE_VM_GET_PAGE_PROT
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_RWX,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
index 22cd0d55a892..571f260b0842 100644
--- a/arch/powerpc/Kbuild
+++ b/arch/powerpc/Kbuild
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -Wa,--fatal-warnings
+subdir-asflags-$(CONFIG_PPC_WERROR) := -Wa,--fatal-warnings
obj-y += kernel/
obj-y += mm/
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c45fa9d7fb76..70e118f140eb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -286,7 +286,7 @@ config PPC
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
- select KASAN_VMALLOC if KASAN && MODULES
+ select KASAN_VMALLOC if KASAN && EXECMEM
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
@@ -687,6 +687,10 @@ config ARCH_SELECTS_CRASH_DUMP
depends on CRASH_DUMP
select RELOCATABLE if PPC64 || 44x || PPC_85xx
+config ARCH_SUPPORTS_CRASH_HOTPLUG
+ def_bool y
+ depends on PPC64
+
config FA_DUMP
bool "Firmware-assisted dump"
depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 65261cbe5bfd..0a0c57aee1ae 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -114,7 +114,6 @@ LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
ifdef CONFIG_PPC64
ifndef CONFIG_PPC_KERNEL_PCREL
-ifeq ($(call cc-option-yn,-mcmodel=medium),y)
# -mcmodel=medium breaks modules because it uses 32bit offsets from
# the TOC pointer to create pointers where possible. Pointers into the
# percpu data area are created by this method.
@@ -124,9 +123,6 @@ ifeq ($(call cc-option-yn,-mcmodel=medium),y)
# kernel percpu data space (starting with 0xc...). We need a full
# 64bit relocation for this to work, hence -mcmodel=large.
KBUILD_CFLAGS_MODULE += -mcmodel=large
-else
- export NO_MINIMAL_TOC := -mno-minimal-toc
-endif
endif
endif
@@ -139,7 +135,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
endif
endif
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
+CFLAGS-$(CONFIG_PPC64) += -mcmodel=medium
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mlong-double-128)
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 968aee2025b8..9c2b6e527ed1 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -108,8 +108,8 @@ DTC_FLAGS ?= -p 1024
# these files into the build dir, fix up any includes and ensure that dependent
# files are copied in the right order.
-# these need to be seperate variables because they are copied out of different
-# directories in the kernel tree. Sure you COULd merge them, but it's a
+# these need to be separate variables because they are copied out of different
+# directories in the kernel tree. Sure you COULD merge them, but it's a
# cure-is-worse-than-disease situation.
zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c
zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c
diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c
index 977eb15a6d17..6835cb53f034 100644
--- a/arch/powerpc/boot/decompress.c
+++ b/arch/powerpc/boot/decompress.c
@@ -101,7 +101,7 @@ static void print_err(char *s)
* @input_size: length of the input buffer
* @outbuf: output buffer
* @output_size: length of the output buffer
- * @skip number of output bytes to ignore
+ * @_skip: number of output bytes to ignore
*
* This function takes compressed data from inbuf, decompresses and write it to
* outbuf. Once output_size bytes are written to the output buffer, or the
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index deb52e41ab84..5fedda811378 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -172,7 +172,7 @@
reg = <0xef602800 0x60>;
interrupt-parent = <&UIC0>;
interrupts = <0x4 0x4>;
- /* This thing is a bit weird. It has it's own UIC
+ /* This thing is a bit weird. It has its own UIC
* that it uses to generate snapshot triggers. We
* don't really support this device yet, and it needs
* work to figure this out.
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 4f044b41a776..fb3200b006ad 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -50,7 +50,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
index 8da984251abc..0ba86a6dce1b 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
@@ -15,7 +15,7 @@
device_type = "memory";
};
- board_ifc: ifc: ifc@ff71e000 {
+ board_ifc: ifc: memory-controller@ff71e000 {
/* NAND Flash on board */
ranges = <0x0 0x0 0x0 0xff800000 0x00004000>;
reg = <0x0 0xff71e000 0x0 0x2000>;
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
index 2a677fd323eb..5c53cee8755f 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <16 2 0 0 20 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
index 7cb2158dfe58..ce642e879a1b 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
@@ -15,7 +15,7 @@
device_type = "memory";
};
- ifc: ifc@ff71e000 {
+ ifc: memory-controller@ff71e000 {
/* NOR, NAND Flash on board */
ranges = <0x0 0x0 0x0 0x88000000 0x08000000
0x1 0x0 0x0 0xff800000 0x00010000>;
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
index b8e0edd1ac69..4da451e000d9 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
/* FIXME: Test whether interrupts are split */
interrupts = <16 2 0 0 20 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts
index 5e905e0857cf..e2fdac2ed420 100644
--- a/arch/powerpc/boot/dts/fsl/c293pcie.dts
+++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts
@@ -42,7 +42,7 @@
device_type = "memory";
};
- ifc: ifc@fffe1e000 {
+ ifc: memory-controller@fffe1e000 {
reg = <0xf 0xffe1e000 0 0x2000>;
ranges = <0x0 0x0 0xf 0xec000000 0x04000000
0x1 0x0 0xf 0xff800000 0x00010000
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
index f208fb8f64b3..2d443d519274 100644
--- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <19 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
index 41935709ebe8..fba40a1bccc0 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
@@ -199,6 +199,10 @@
/include/ "pq3-dma-0.dtsi"
/include/ "pq3-etsec1-0.dtsi"
+ enet0: ethernet@24000 {
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec1_clk>;
+ };
/include/ "pq3-etsec1-timer-0.dtsi"
usb@22000 {
@@ -222,9 +226,10 @@
};
/include/ "pq3-etsec1-2.dtsi"
-
- ethernet@26000 {
+ enet2: ethernet@26000 {
cell-index = <1>;
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec3_clk>;
};
usb@2b000 {
@@ -249,4 +254,9 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
+ power@e0070 {
+ compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
index b68eb119faef..ea7416af7ee3 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
@@ -188,4 +188,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
index 579d76cb8e32..dddb7374508d 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
@@ -156,4 +156,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
index 49294cf36b4e..40a6cff77032 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
@@ -193,4 +193,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
index 3a94acbb3c03..ce3346d77858 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
@@ -29,3 +29,19 @@
};
/include/ "p1010si-post.dtsi"
+
+&pci0 {
+ pcie@0 {
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ /*
+ *irq[4:5] are active-high
+ *irq[6:7] are active-low
+ */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
index 4cf255fedc96..83590354f9a0 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
@@ -56,3 +56,19 @@
};
/include/ "p1010si-post.dtsi"
+
+&pci0 {
+ pcie@0 {
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ /*
+ *irq[4:5] are active-high
+ *irq[6:7] are active-low
+ */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
index 2ca9cee2ddeb..ef49a7d6c69d 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -215,19 +215,3 @@
phy-connection-type = "sgmii";
};
};
-
-&pci0 {
- pcie@0 {
- interrupt-map = <
- /* IDSEL 0x0 */
- /*
- *irq[4:5] are active-high
- *irq[6:7] are active-low
- */
- 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
- 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
- 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
- 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
- >;
- };
-};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
index fdc19aab2f70..583a6cd05079 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
@@ -36,7 +36,7 @@ memory {
device_type = "memory";
};
-board_ifc: ifc: ifc@ffe1e000 {
+board_ifc: ifc: memory-controller@ffe1e000 {
/* NOR, NAND Flashes and CPLD on board */
ranges = <0x0 0x0 0x0 0xee000000 0x02000000
0x1 0x0 0x0 0xff800000 0x00010000
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
index de2fceed4f79..4d41efe0038f 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
@@ -36,7 +36,7 @@ memory {
device_type = "memory";
};
-board_ifc: ifc: ifc@fffe1e000 {
+board_ifc: ifc: memory-controller@fffe1e000 {
/* NOR, NAND Flashes and CPLD on board */
ranges = <0x0 0x0 0xf 0xee000000 0x02000000
0x1 0x0 0xf 0xff800000 0x00010000
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index ccda0a91abf0..2d2550729dcc 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <16 2 0 0 19 2 0 0>;
};
@@ -183,9 +183,23 @@
/include/ "pq3-etsec2-1.dtsi"
/include/ "pq3-etsec2-2.dtsi"
+ enet0: ethernet@b0000 {
+ fsl,pmc-handle = <&etsec1_clk>;
+ };
+
+ enet1: ethernet@b1000 {
+ fsl,pmc-handle = <&etsec2_clk>;
+ };
+
+ enet2: ethernet@b2000 {
+ fsl,pmc-handle = <&etsec3_clk>;
+ };
+
global-utilities@e0000 {
compatible = "fsl,p1010-guts";
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
index 642dc3a83d0e..cc4c7461003b 100644
--- a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
@@ -163,14 +163,17 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
+ fsl,pmc-handle = <&etsec2_clk>;
};
/include/ "pq3-etsec2-2.dtsi"
enet2: enet2_grp2: ethernet@b2000 {
+ fsl,pmc-handle = <&etsec3_clk>;
};
global-utilities@e0000 {
@@ -178,6 +181,8 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
/include/ "pq3-etsec2-grp2-0.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
index 407cb5fd0f5b..378195db9fca 100644
--- a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
@@ -159,14 +159,17 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
+ fsl,pmc-handle = <&etsec2_clk>;
};
/include/ "pq3-etsec2-2.dtsi"
enet2: enet2_grp2: ethernet@b2000 {
+ fsl,pmc-handle = <&etsec3_clk>;
};
global-utilities@e0000 {
@@ -174,6 +177,8 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
&qe {
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
index 093e4e3ed368..6ac21e81344a 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -225,11 +225,13 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec2_clk>;
};
global-utilities@e0000 {
@@ -238,9 +240,10 @@
fsl,has-rstcr;
};
+/include/ "pq3-power.dtsi"
power@e0070 {
- compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
- reg = <0xe0070 0x20>;
+ compatible = "fsl,p1022-pmc", "fsl,mpc8536-pmc",
+ "fsl,mpc8548-pmc";
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
index 81b9ab2119be..d410082d21c0 100644
--- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
@@ -178,6 +178,10 @@
compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
};
/include/ "pq3-etsec1-0.dtsi"
+ enet0: ethernet@24000 {
+ fsl,pmc-handle = <&etsec1_clk>;
+
+ };
/include/ "pq3-etsec1-timer-0.dtsi"
ptp_clock@24e00 {
@@ -186,7 +190,15 @@
/include/ "pq3-etsec1-1.dtsi"
+ enet1: ethernet@25000 {
+ fsl,pmc-handle = <&etsec2_clk>;
+ };
+
/include/ "pq3-etsec1-2.dtsi"
+ enet2: ethernet@26000 {
+ fsl,pmc-handle = <&etsec3_clk>;
+ };
+
/include/ "pq3-esdhc-0.dtsi"
sdhc@2e000 {
compatible = "fsl,p2020-esdhc", "fsl,esdhc";
@@ -202,8 +214,5 @@
fsl,has-rstcr;
};
- pmc: power@e0070 {
- compatible = "fsl,mpc8548-pmc";
- reg = <0xe0070 0x20>;
- };
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-power.dtsi b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi
new file mode 100644
index 000000000000..6af12401004d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: (GPL-2.0+)
+/*
+ * Copyright 2024 NXP
+ */
+
+power@e0070 {
+ compatible = "fsl,mpc8548-pmc";
+ reg = <0xe0070 0x20>;
+
+ etsec1_clk: soc-clk@24 {
+ fsl,pmcdr-mask = <0x00000080>;
+ };
+ etsec2_clk: soc-clk@25 {
+ fsl,pmcdr-mask = <0x00000040>;
+ };
+ etsec3_clk: soc-clk@26 {
+ fsl,pmcdr-mask = <0x00000020>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index aa5152ca8120..8ef0c020206b 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -52,7 +52,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index 270aaf631f2a..7d003e07a9fb 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -91,7 +91,7 @@
board-control@2,0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "fsl,t1024-cpld";
+ compatible = "fsl,t1024-cpld", "fsl,deepsleep-cpld";
reg = <3 0 0x300>;
ranges = <0 3 0 0x300>;
bank-width = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index dd3aab81e9de..4347924e9aa7 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -104,7 +104,7 @@
ifc: localbus@ffe124000 {
cpld@3,0 {
- compatible = "fsl,t1040rdb-cpld";
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
};
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 776788623204..c9542b73bd7f 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -52,7 +52,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
index 3ebb712224cb..099764322b33 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -68,7 +68,7 @@
ifc: localbus@ffe124000 {
cpld@3,0 {
- compatible = "fsl,t1042rdb-cpld";
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
};
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
index 8ec3ff45e6fc..b10cab1a347b 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
@@ -41,7 +41,7 @@
ifc: localbus@ffe124000 {
cpld@3,0 {
- compatible = "fsl,t1042rdb_pi-cpld";
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
index 27714dc2f04a..6bb95878d39d 100644
--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -50,7 +50,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index fcac73486d48..65f3e17c0d41 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -50,7 +50,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index cae31a6e8f02..2c0e2a1cab01 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -188,7 +188,7 @@ static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
+ * The buffer is put in its own section so that tools may locate it easier.
*/
static char cmdline[BOOT_COMMAND_LINE_SIZE]
__attribute__((__section__("__builtin_cmdline")));
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index f157717ae814..89ff46b8b225 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -25,7 +25,7 @@ BSS_STACK(4096);
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
+ * The buffer is put in its own section so that tools may locate it easier.
*/
static char cmdline[BOOT_COMMAND_LINE_SIZE]
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 7f35d5bc1229..97f4d4851735 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index a98ef6a4abef..50cc59eb36cf 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index 5c56d36cdfc5..6f449411abf7 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
# CONFIG_BUG is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_EPOLL is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 56b876e418e9..77306be62e9e 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -7,7 +7,7 @@ CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 083c2e57520a..383c0966e92f 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_MODULES=y
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 727d4b321937..0efabccd820c 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -29,7 +29,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature)
#endif
#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
- if (!static_key_initialized) {
+ if (!static_key_feature_checks_initialized) {
printk("Warning! cpu_has_feature() used prior to jump label init!\n");
dump_stack();
return early_cpu_has_feature(feature);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 514dd056c2c8..91a9fd53254f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -82,7 +82,7 @@ struct eeh_pe {
int false_positives; /* Times of reported #ff's */
atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
- void *data; /* PE auxillary data */
+ void *data; /* PE auxiliary data */
struct list_head child_list; /* List of PEs below this PE */
struct list_head child; /* Memb. child_list/eeh_phb_pe */
struct list_head edevs; /* List of eeh_dev in this PE */
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
index 27f9e11eda28..e83869a4eb6a 100644
--- a/arch/powerpc/include/asm/fadump-internal.h
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -42,13 +42,38 @@ static inline u64 fadump_str_to_u64(const char *str)
#define FADUMP_CPU_UNKNOWN (~((u32)0))
-#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF")
+/*
+ * The introduction of new fields in the fadump crash info header has
+ * led to a change in the magic key from `FADMPINF` to `FADMPSIG` for
+ * identifying a kernel crash from an old kernel.
+ *
+ * To prevent the need for further changes to the magic number in the
+ * event of future modifications to the fadump crash info header, a
+ * version field has been introduced to track the fadump crash info
+ * header version.
+ *
+ * Consider a few points before adding new members to the fadump crash info
+ * header structure:
+ *
+ * - Append new members; avoid adding them in between.
+ * - Non-primitive members should have a size member as well.
+ * - For every change in the fadump header, increment the
+ * fadump header version. This helps the updated kernel decide how to
+ * handle kernel dumps from older kernels.
+ */
+#define FADUMP_CRASH_INFO_MAGIC_OLD fadump_str_to_u64("FADMPINF")
+#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPSIG")
+#define FADUMP_HEADER_VERSION 1
/* fadump crash info structure */
struct fadump_crash_info_header {
u64 magic_number;
- u64 elfcorehdr_addr;
+ u32 version;
u32 crashing_cpu;
+ u64 vmcoreinfo_raddr;
+ u64 vmcoreinfo_size;
+ u32 pt_regs_sz;
+ u32 cpu_mask_sz;
struct pt_regs regs;
struct cpumask cpu_mask;
};
@@ -94,9 +119,13 @@ struct fw_dump {
u64 boot_mem_regs_cnt;
unsigned long fadumphdr_addr;
+ u64 elfcorehdr_addr;
+ u64 elfcorehdr_size;
unsigned long cpu_notes_buf_vaddr;
unsigned long cpu_notes_buf_size;
+ unsigned long param_area;
+
/*
* Maximum size supported by firmware to copy from source to
* destination address per entry.
@@ -111,6 +140,7 @@ struct fw_dump {
unsigned long dump_active:1;
unsigned long dump_registered:1;
unsigned long nocma:1;
+ unsigned long param_area_supported:1;
struct fadump_ops *ops;
};
@@ -129,6 +159,7 @@ struct fadump_ops {
struct seq_file *m);
void (*fadump_trigger)(struct fadump_crash_info_header *fdh,
const char *msg);
+ int (*fadump_max_boot_mem_rgns)(void);
};
/* Helper functions */
@@ -136,7 +167,6 @@ s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus);
void fadump_free_cpu_notes_buf(void);
u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
void __init fadump_update_elfcore_header(char *bufp);
-bool is_fadump_boot_mem_contiguous(void);
bool is_fadump_reserved_mem_contiguous(void);
#else /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 526a6a647312..ef40c9b6972a 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -19,12 +19,14 @@ extern int is_fadump_active(void);
extern int should_fadump_crash(void);
extern void crash_fadump(struct pt_regs *, const char *);
extern void fadump_cleanup(void);
+extern void fadump_append_bootargs(void);
#else /* CONFIG_FA_DUMP */
static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
static inline void fadump_cleanup(void) { }
+static inline void fadump_append_bootargs(void) { }
#endif /* !CONFIG_FA_DUMP */
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 77824bd289a3..17d168dd8b49 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -291,6 +291,8 @@ extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
+extern bool static_key_feature_checks_initialized;
+
void apply_feature_fixups(void);
void update_mmu_feature_fixups(unsigned long mask);
void setup_feature_keys(void);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a41e542ba94d..7a8495660c2f 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -524,7 +524,7 @@ long plpar_hcall_norets_notrace(unsigned long opcode, ...);
* Used for all but the craziest of phyp interfaces (see plpar_hcall9)
*/
#define PLPAR_HCALL_BUFSIZE 4
-long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...);
/**
* plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats
@@ -538,7 +538,7 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
* plpar_hcall, but plpar_hcall_raw works in real mode and does not
* calculate hypervisor call statistics.
*/
-long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...);
/**
* plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
@@ -549,8 +549,8 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
* PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
*/
#define PLPAR_HCALL9_BUFSIZE 9
-long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
-long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall9(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...);
+long plpar_hcall9_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...);
/* pseries hcall tracing */
extern struct static_key hcall_tracepoint_key;
@@ -570,7 +570,7 @@ struct hvcall_mpp_data {
unsigned long backing_mem;
};
-int h_get_mpp(struct hvcall_mpp_data *);
+long h_get_mpp(struct hvcall_mpp_data *mpp_data);
struct hvcall_mpp_x_data {
unsigned long coalesced_bytes;
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 7b610864b364..2d6c886b40f4 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -336,6 +336,14 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
if (IS_ENABLED(CONFIG_KASAN))
return;
+ /*
+ * Likewise, do not use it in real mode if percpu first chunk is not
+ * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there
+ * are chances where percpu allocation can come from vmalloc area.
+ */
+ if (percpu_first_chunk_is_paged)
+ return;
+
/* Otherwise, it should be safe to call it */
nmi_enter();
}
@@ -351,6 +359,8 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
// no nmi_exit for a pseries hash guest taking a real mode exception
} else if (IS_ENABLED(CONFIG_KASAN)) {
// no nmi_exit for KASAN in real mode
+ } else if (percpu_first_chunk_is_paged) {
+ // no nmi_exit if percpu first chunk is not embedded
} else {
nmi_exit();
}
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 08c550ed49be..52e1b1d15ff6 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -37,7 +37,7 @@ extern struct pci_dev *isa_bridge_pcidev;
* define properly based on the platform
*/
#ifndef CONFIG_PCI
-#define _IO_BASE 0
+#define _IO_BASE POISON_POINTER_DELTA
#define _ISA_MEM_BASE 0
#define PCI_DRAM_OFFSET 0
#elif defined(CONFIG_PPC32)
@@ -585,12 +585,12 @@ __do_out_asm(_rec_outl, "stwbrx")
#define __do_inw(port) _rec_inw(port)
#define __do_inl(port) _rec_inl(port)
#else /* CONFIG_PPC32 */
-#define __do_outb(val, port) writeb(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_outw(val, port) writew(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_outl(val, port) writel(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_inb(port) readb((PCI_IO_ADDR)_IO_BASE + port);
-#define __do_inw(port) readw((PCI_IO_ADDR)_IO_BASE + port);
-#define __do_inl(port) readl((PCI_IO_ADDR)_IO_BASE + port);
+#define __do_outb(val, port) writeb(val,(PCI_IO_ADDR)(_IO_BASE+port));
+#define __do_outw(val, port) writew(val,(PCI_IO_ADDR)(_IO_BASE+port));
+#define __do_outl(val, port) writel(val,(PCI_IO_ADDR)(_IO_BASE+port));
+#define __do_inb(port) readb((PCI_IO_ADDR)(_IO_BASE + port));
+#define __do_inw(port) readw((PCI_IO_ADDR)(_IO_BASE + port));
+#define __do_inl(port) readl((PCI_IO_ADDR)(_IO_BASE + port));
#endif /* !CONFIG_PPC32 */
#ifdef CONFIG_EEH
@@ -606,12 +606,12 @@ __do_out_asm(_rec_outl, "stwbrx")
#define __do_writesw(a, b, n) _outsw(PCI_FIX_ADDR(a),(b),(n))
#define __do_writesl(a, b, n) _outsl(PCI_FIX_ADDR(a),(b),(n))
-#define __do_insb(p, b, n) readsb((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_insw(p, b, n) readsw((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_insl(p, b, n) readsl((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_outsb(p, b, n) writesb((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
-#define __do_outsw(p, b, n) writesw((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
-#define __do_outsl(p, b, n) writesl((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
+#define __do_insb(p, b, n) readsb((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n))
+#define __do_insw(p, b, n) readsw((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n))
+#define __do_insl(p, b, n) readsl((PCI_IO_ADDR)(_IO_BASE+(p)), (b), (n))
+#define __do_outsb(p, b, n) writesb((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n))
+#define __do_outsw(p, b, n) writesw((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n))
+#define __do_outsl(p, b, n) writesl((PCI_IO_ADDR)(_IO_BASE+(p)),(b),(n))
#define __do_memset_io(addr, c, n) \
_memset_io(PCI_FIX_ADDR(addr), c, n)
@@ -982,7 +982,7 @@ static inline phys_addr_t page_to_phys(struct page *page)
}
/*
- * 32 bits still uses virt_to_bus() for it's implementation of DMA
+ * 32 bits still uses virt_to_bus() for its implementation of DMA
* mappings se we have to keep it defined here. We also have some old
* drivers (shame shame shame) that use bus_to_virt() and haven't been
* fixed yet so I need to define it here.
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index 365d2720097c..b5bbb94c51f6 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -19,7 +19,7 @@
#define KASAN_SHADOW_SCALE_SHIFT 3
-#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32)
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
#else
#define KASAN_KERN_START PAGE_OFFSET
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index fdb90e24dc74..95a98b390d62 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -135,6 +135,17 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
ppc_save_regs(newregs);
}
+#ifdef CONFIG_CRASH_HOTPLUG
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
+#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
+#endif /* CONFIG_CRASH_HOTPLUG */
+
extern int crashing_cpu;
extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
extern void crash_ipi_callback(struct pt_regs *regs);
@@ -185,6 +196,10 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
#endif /* CONFIG_CRASH_DUMP */
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+int update_cpus_node(void *fdt);
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/kexec.h>
#endif
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e87..14055896cbcb 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,19 +7,9 @@
void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
-int add_tce_mem_ranges(struct crash_mem **mem_ranges);
-int add_initrd_mem_range(struct crash_mem **mem_ranges);
-#ifdef CONFIG_PPC_64S_HASH_MMU
-int add_htab_mem_range(struct crash_mem **mem_ranges);
-#else
-static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
-{
- return 0;
-}
-#endif
-int add_kernel_mem_range(struct crash_mem **mem_ranges);
-int add_rtas_mem_range(struct crash_mem **mem_ranges);
-int add_opal_mem_range(struct crash_mem **mem_ranges);
-int add_reserved_mem_ranges(struct crash_mem **mem_ranges);
-
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
+int get_crash_memory_ranges(struct crash_mem **mem_ranges);
+int get_usable_memory_ranges(struct crash_mem **mem_ranges);
#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3281215097cc..ca3829d47ab7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,7 +287,6 @@ struct kvmppc_ops {
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
- bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3b72c7ed24cf..24f830cf9bb4 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -251,7 +251,7 @@ static __always_inline bool mmu_has_feature(unsigned long feature)
#endif
#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
- if (!static_key_initialized) {
+ if (!static_key_feature_checks_initialized) {
printk("Warning! mmu_has_feature() used prior to jump label init!\n");
dump_stack();
return early_mmu_has_feature(feature);
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index a8e2e8339fb7..300c777cc307 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -48,11 +48,6 @@ struct mod_arch_specific {
unsigned long tramp;
unsigned long tramp_regs;
#endif
-
- /* List of BUG addresses, source line numbers and filenames */
- struct list_head bug_list;
- struct bug_entry *bug_table;
- unsigned int num_bugs;
};
/*
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index a2bc4b95e703..8c9d4b26bf57 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1027,10 +1027,10 @@ struct opal_i2c_request {
* The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX
* with individual elements being 16 bits wide to fetch the system
* wide EPOW status. Each element in the buffer will contain the
- * EPOW status in it's bit representation for a particular EPOW sub
+ * EPOW status in its bit representation for a particular EPOW sub
* class as defined here. So multiple detailed EPOW status bits
* specific for any sub class can be represented in a single buffer
- * element as it's bit representation.
+ * element as its bit representation.
*/
/* System EPOW type */
diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h
index 8e5b7d0b851c..634970ce13c6 100644
--- a/arch/powerpc/include/asm/percpu.h
+++ b/arch/powerpc/include/asm/percpu.h
@@ -15,6 +15,16 @@
#endif /* CONFIG_SMP */
#endif /* __powerpc64__ */
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) && defined(CONFIG_SMP)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
+
+#define percpu_first_chunk_is_paged \
+ (static_key_enabled(&__percpu_first_chunk_is_paged.key))
+#else
+#define percpu_first_chunk_is_paged false
+#endif /* CONFIG_PPC64 && CONFIG_SMP */
+
#include <asm-generic/percpu.h>
#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
index 2495866f2e97..420e2878ae67 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -192,7 +192,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node,
/* PMAC_FTR_BMAC_ENABLE (struct device_node* node, 0, int value)
* enable/disable the bmac (ethernet) cell of a mac-io ASIC, also drive
- * it's reset line
+ * its reset line
*/
#define PMAC_FTR_BMAC_ENABLE PMAC_FTR_DEF(6)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 005601243dda..076ae60b4a55 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -510,6 +510,7 @@
#define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LHA(r, base, i) (0xa8000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
@@ -532,6 +533,7 @@
#define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_DIVW(d, a, b) (0x7c0003d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
@@ -550,6 +552,8 @@
#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
#define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_EXTSB(d, a) (0x7c000774 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_EXTSH(d, a) (0x7c000734 | ___PPC_RA(d) | ___PPC_RS(a))
#define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a))
#define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
#define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index b2c51d337e60..e44cac0da346 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -260,7 +260,8 @@ struct thread_struct {
unsigned long sier2;
unsigned long sier3;
unsigned long hashkeyr;
-
+ unsigned long dexcr;
+ unsigned long dexcr_onexec; /* Reset value to load on exec */
#endif
};
@@ -333,6 +334,16 @@ extern int set_endian(struct task_struct *tsk, unsigned int val);
extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define PPC_GET_DEXCR_ASPECT(tsk, asp) get_dexcr_prctl((tsk), (asp))
+#define PPC_SET_DEXCR_ASPECT(tsk, asp, val) set_dexcr_prctl((tsk), (asp), (val))
+
+int get_dexcr_prctl(struct task_struct *tsk, unsigned long asp);
+int set_dexcr_prctl(struct task_struct *tsk, unsigned long asp, unsigned long val);
+
+#endif
+
extern void load_fp_state(struct thread_fp_state *fp);
extern void store_fp_state(struct thread_fp_state *fp);
extern void load_vr_state(struct thread_vr_state *vr);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d3d1aea009b4..eed33cb916d0 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -615,7 +615,7 @@
#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */
#define HID1_PS (1<<16) /* 750FX PLL selection */
#endif
-#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */
+#define SPRN_HID2_750FX 0x3F8 /* IBM 750FX HID2 Register */
#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */
#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */
#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index e278299b9b37..6949b5daa37d 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -144,7 +144,7 @@
#define UNI_N_HWINIT_STATE_SLEEPING 0x01
#define UNI_N_HWINIT_STATE_RUNNING 0x02
/* This last bit appear to be used by the bootROM to know the second
- * CPU has started and will enter it's sleep loop with IP=0
+ * CPU has started and will enter its sleep loop with IP=0
*/
#define UNI_N_HWINIT_STATE_CPU1_FLAG 0x10000000
diff --git a/arch/powerpc/include/uapi/asm/bootx.h b/arch/powerpc/include/uapi/asm/bootx.h
index 6728c7e24e58..1b8c121071d9 100644
--- a/arch/powerpc/include/uapi/asm/bootx.h
+++ b/arch/powerpc/include/uapi/asm/bootx.h
@@ -108,7 +108,7 @@ typedef struct boot_infos
/* ALL BELOW NEW (vers. 4) */
/* This defines the physical memory. Valid with BOOT_ARCH_NUBUS flag
- (non-PCI) only. On PCI, memory is contiguous and it's size is in the
+ (non-PCI) only. On PCI, memory is contiguous and its size is in the
device-tree. */
boot_info_map_entry_t
physMemoryMap[MAX_MEM_MAP_SIZE]; /* Where the phys memory is */
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
deleted file mode 100644
index 17439925045c..000000000000
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
- *
- * (C) Copyright IBM 2020
- *
- * Author: Vaibhav Jain <vaibhav at linux.ibm.com>
- */
-
-#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
-
-#include <linux/types.h>
-#include <linux/ndctl.h>
-
-/*
- * PDSM Envelope:
- *
- * The ioctl ND_CMD_CALL exchange data between user-space and kernel via
- * envelope which consists of 2 headers sections and payload sections as
- * illustrated below:
- * +-----------------+---------------+---------------------------+
- * | 64-Bytes | 8-Bytes | Max 184-Bytes |
- * +-----------------+---------------+---------------------------+
- * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD |
- * +-----------------+---------------+---------------------------+
- * | nd_family | | |
- * | nd_size_out | cmd_status | |
- * | nd_size_in | reserved | nd_pdsm_payload |
- * | nd_command | payload --> | |
- * | nd_fw_size | | |
- * | nd_payload ---> | | |
- * +---------------+-----------------+---------------------------+
- *
- * ND Header:
- * This is the generic libnvdimm header described as 'struct nd_cmd_pkg'
- * which is interpreted by libnvdimm before passed on to papr_scm. Important
- * member fields used are:
- * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM
- * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0)
- * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD
- * 'nd_command' : (In) One of PAPR_PDSM_XXX
- * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned
- *
- * PDSM Header:
- * This is papr-scm specific header that precedes the payload. This is defined
- * as nd_cmd_pdsm_pkg. Following fields aare available in this header:
- *
- * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM.
- * 'reserved' : Not used, reserved for future and should be set to 0.
- * 'payload' : A union of all the possible payload structs
- *
- * PDSM Payload:
- *
- * The layout of the PDSM Payload is defined by various structs shared between
- * papr_scm and libndctl so that contents of payload can be interpreted. As such
- * its defined as a union of all possible payload structs as
- * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command'
- * appropriate member of the union is accessed.
- */
-
-/* Max payload size that we can handle */
-#define ND_PDSM_PAYLOAD_MAX_SIZE 184
-
-/* Max payload size that we can handle */
-#define ND_PDSM_HDR_SIZE \
- (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE)
-
-/* Various nvdimm health indicators */
-#define PAPR_PDSM_DIMM_HEALTHY 0
-#define PAPR_PDSM_DIMM_UNHEALTHY 1
-#define PAPR_PDSM_DIMM_CRITICAL 2
-#define PAPR_PDSM_DIMM_FATAL 3
-
-/* struct nd_papr_pdsm_health.extension_flags field flags */
-
-/* Indicate that the 'dimm_fuel_gauge' field is valid */
-#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
-
-/* Indicate that the 'dimm_dsc' field is valid */
-#define PDSM_DIMM_DSC_VALID 2
-
-/*
- * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
- * Various flags indicate the health status of the dimm.
- *
- * extension_flags : Any extension fields present in the struct.
- * dimm_unarmed : Dimm not armed. So contents wont persist.
- * dimm_bad_shutdown : Previous shutdown did not persist contents.
- * dimm_bad_restore : Contents from previous shutdown werent restored.
- * dimm_scrubbed : Contents of the dimm have been scrubbed.
- * dimm_locked : Contents of the dimm cant be modified until CEC reboot
- * dimm_encrypted : Contents of dimm are encrypted.
- * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
- * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100
- */
-struct nd_papr_pdsm_health {
- union {
- struct {
- __u32 extension_flags;
- __u8 dimm_unarmed;
- __u8 dimm_bad_shutdown;
- __u8 dimm_bad_restore;
- __u8 dimm_scrubbed;
- __u8 dimm_locked;
- __u8 dimm_encrypted;
- __u16 dimm_health;
-
- /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
- __u16 dimm_fuel_gauge;
-
- /* Extension flag PDSM_DIMM_DSC_VALID */
- __u64 dimm_dsc;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/* Flags for injecting specific smart errors */
-#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0)
-#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1)
-
-struct nd_papr_pdsm_smart_inject {
- union {
- struct {
- /* One or more of PDSM_SMART_INJECT_ */
- __u32 flags;
- __u8 fatal_enable;
- __u8 unsafe_shutdown_enable;
- };
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
- };
-};
-
-/*
- * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
- * via 'nd_cmd_pkg.nd_command' member of the ioctl struct
- */
-enum papr_pdsm {
- PAPR_PDSM_MIN = 0x0,
- PAPR_PDSM_HEALTH,
- PAPR_PDSM_SMART_INJECT,
- PAPR_PDSM_MAX,
-};
-
-/* Maximal union that can hold all possible payload types */
-union nd_pdsm_payload {
- struct nd_papr_pdsm_health health;
- struct nd_papr_pdsm_smart_inject smart_inject;
- __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
-} __packed;
-
-/*
- * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm
- * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command'
- * that should always precede this struct when sent to papr_scm via CMD_CALL
- * interface.
- */
-struct nd_pkg_pdsm {
- __s32 cmd_status; /* Out: Sub-cmd status returned back */
- __u16 reserved[2]; /* Ignored and to be set as '0' */
- union nd_pdsm_payload payload;
-} __packed;
-
-#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index d3282fbea4f2..8585d03c02d3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -3,9 +3,6 @@
# Makefile for the linux kernel.
#
-ifdef CONFIG_PPC64
-CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
-endif
ifdef CONFIG_PPC32
CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
@@ -87,6 +84,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_DAWR) += dawr.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o
obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
@@ -190,9 +188,6 @@ GCOV_PROFILE_kprobes-ftrace.o := n
KCOV_INSTRUMENT_kprobes-ftrace.o := n
KCSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
-GCOV_PROFILE_syscall_64.o := n
-KCOV_INSTRUMENT_syscall_64.o := n
-UBSAN_SANITIZE_syscall_64.o := n
UBSAN_SANITIZE_vdso.o := n
# Necessary for booting with kcov enabled on book3e machines
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index bfd3f442e5eb..ab3ca74e6730 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -401,7 +401,7 @@ _GLOBAL(__save_cpu_setup)
andi. r3,r3,0xff00
cmpwi cr0,r3,0x0200
bne 1f
- mfspr r4,SPRN_HID2
+ mfspr r4,SPRN_HID2_750FX
stw r4,CS_HID2(r5)
1:
mtcr r7
@@ -496,7 +496,7 @@ _GLOBAL(__restore_cpu_setup)
bne 4f
lwz r4,CS_HID2(r5)
rlwinm r4,r4,0,19,17
- mtspr SPRN_HID2,r4
+ mtspr SPRN_HID2_750FX,r4
sync
4:
lwz r4,CS_HID1(r5)
diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c
new file mode 100644
index 000000000000..3a0358e91c60
--- /dev/null
+++ b/arch/powerpc/kernel/dexcr.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/cputable.h>
+#include <asm/processor.h>
+#include <asm/reg.h>
+
+static int __init init_task_dexcr(void)
+{
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
+ current->thread.dexcr_onexec = mfspr(SPRN_DEXCR);
+
+ return 0;
+}
+early_initcall(init_task_dexcr)
+
+/* Allow thread local configuration of these by default */
+#define DEXCR_PRCTL_EDITABLE ( \
+ DEXCR_PR_IBRTPD | \
+ DEXCR_PR_SRAPD | \
+ DEXCR_PR_NPHIE)
+
+static int prctl_to_aspect(unsigned long which, unsigned int *aspect)
+{
+ switch (which) {
+ case PR_PPC_DEXCR_SBHE:
+ *aspect = DEXCR_PR_SBHE;
+ break;
+ case PR_PPC_DEXCR_IBRTPD:
+ *aspect = DEXCR_PR_IBRTPD;
+ break;
+ case PR_PPC_DEXCR_SRAPD:
+ *aspect = DEXCR_PR_SRAPD;
+ break;
+ case PR_PPC_DEXCR_NPHIE:
+ *aspect = DEXCR_PR_NPHIE;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+int get_dexcr_prctl(struct task_struct *task, unsigned long which)
+{
+ unsigned int aspect;
+ int ret;
+
+ ret = prctl_to_aspect(which, &aspect);
+ if (ret)
+ return ret;
+
+ if (aspect & DEXCR_PRCTL_EDITABLE)
+ ret |= PR_PPC_DEXCR_CTRL_EDITABLE;
+
+ if (aspect & mfspr(SPRN_DEXCR))
+ ret |= PR_PPC_DEXCR_CTRL_SET;
+ else
+ ret |= PR_PPC_DEXCR_CTRL_CLEAR;
+
+ if (aspect & task->thread.dexcr_onexec)
+ ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC;
+ else
+ ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC;
+
+ return ret;
+}
+
+int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl)
+{
+ unsigned long dexcr;
+ unsigned int aspect;
+ int err = 0;
+
+ err = prctl_to_aspect(which, &aspect);
+ if (err)
+ return err;
+
+ if (!(aspect & DEXCR_PRCTL_EDITABLE))
+ return -EPERM;
+
+ if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK)
+ return -EINVAL;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR)
+ return -EINVAL;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC)
+ return -EINVAL;
+
+ /*
+ * We do not want an unprivileged process being able to disable
+ * a setuid process's hash check instructions
+ */
+ if (aspect == DEXCR_PR_NPHIE &&
+ ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ dexcr = mfspr(SPRN_DEXCR);
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET)
+ dexcr |= aspect;
+ else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR)
+ dexcr &= ~aspect;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC)
+ task->thread.dexcr_onexec |= aspect;
+ else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC)
+ task->thread.dexcr_onexec &= ~aspect;
+
+ mtspr(SPRN_DEXCR, dexcr);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index ab316e155ea9..6670063a7a6c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* We will punt with the following conditions: Failure to get
* PE's state, EEH not support and Permanently unavailable
* state, PE is in good state.
+ *
+ * On the pSeries, after reaching the threshold, get_state might
+ * return EEH_STATE_NOT_SUPPORT. However, it's possible that the
+ * device state remains uncleared if the device is not marked
+ * pci_channel_io_perm_failure. Therefore, consider logging the
+ * event to let device removal happen.
+ *
*/
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
+ (ret == EEH_STATE_NOT_SUPPORT &&
+ dev->error_state == pci_channel_io_perm_failure) ||
+ eeh_state_active(ret)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 48773d2d9be3..7efe04c68f0f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -865,9 +865,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
devices++;
if (!devices) {
- pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n",
pe->phb->global_number, pe->addr);
- goto out; /* nothing to recover */
+ /*
+ * The device is removed, tear down its state, on powernv
+ * hotplug driver would take care of it but not on pseries,
+ * permanently disable the card as it is hot removed.
+ *
+ * In the case of powernv, note that the removal of device
+ * is covered by pci rescan lock, so no problem even if hotplug
+ * driver attempts to remove the device.
+ */
+ goto recover_failed;
}
/* Log the event */
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index e0ce81279624..d1030bc52564 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0;
static LIST_HEAD(eeh_phb_pe);
/**
- * eeh_set_pe_aux_size - Set PE auxillary data size
- * @size: PE auxillary data size
+ * eeh_set_pe_aux_size - Set PE auxiliary data size
+ * @size: PE auxiliary data size in bytes
*
- * Set PE auxillary data size
+ * Set PE auxiliary data size.
*/
void eeh_set_pe_aux_size(int size)
{
@@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
* eeh_pe_mark_isolated
* @pe: EEH PE
*
- * Record that a PE has been isolated by marking the PE and it's children as
+ * Record that a PE has been isolated by marking the PE and its children as
* EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
* as pci_channel_io_frozen.
*/
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index d14eda1e8589..60f974775fc8 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -53,8 +53,6 @@ static struct kobject *fadump_kobj;
static atomic_t cpus_in_fadump;
static DEFINE_MUTEX(fadump_mutex);
-static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
-
#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
sizeof(struct fadump_memory_range))
@@ -133,6 +131,41 @@ static int __init fadump_cma_init(void)
static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
+/*
+ * Additional parameters meant for capture kernel are placed in a dedicated area.
+ * If this is capture kernel boot, append these parameters to bootargs.
+ */
+void __init fadump_append_bootargs(void)
+{
+ char *append_args;
+ size_t len;
+
+ if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area)
+ return;
+
+ if (fw_dump.param_area >= fw_dump.boot_mem_top) {
+ if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) {
+ pr_warn("WARNING: Can't use additional parameters area!\n");
+ fw_dump.param_area = 0;
+ return;
+ }
+ }
+
+ append_args = (char *)fw_dump.param_area;
+ len = strlen(boot_command_line);
+
+ /*
+ * Too late to fail even if cmdline size exceeds. Truncate additional parameters
+ * to cmdline size and proceed anyway.
+ */
+ if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1)
+ pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n");
+
+ pr_debug("Cmdline: %s\n", boot_command_line);
+ snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args);
+ pr_info("Updated cmdline: %s\n", boot_command_line);
+}
+
/* Scan the Firmware Assisted dump configuration details. */
int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
int depth, void *data)
@@ -223,28 +256,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
}
/*
- * Returns true, if there are no holes in boot memory area,
- * false otherwise.
- */
-bool is_fadump_boot_mem_contiguous(void)
-{
- unsigned long d_start, d_end;
- bool ret = false;
- int i;
-
- for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
- d_start = fw_dump.boot_mem_addr[i];
- d_end = d_start + fw_dump.boot_mem_sz[i];
-
- ret = is_fadump_mem_area_contiguous(d_start, d_end);
- if (!ret)
- break;
- }
-
- return ret;
-}
-
-/*
* Returns true, if there are no holes in reserved memory area,
* false otherwise.
*/
@@ -373,12 +384,6 @@ static unsigned long __init get_fadump_area_size(void)
size = PAGE_ALIGN(size);
size += fw_dump.boot_memory_size;
size += sizeof(struct fadump_crash_info_header);
- size += sizeof(struct elfhdr); /* ELF core header.*/
- size += sizeof(struct elf_phdr); /* place holder for cpu notes */
- /* Program headers for crash memory regions. */
- size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
-
- size = PAGE_ALIGN(size);
/* This is to hold kernel metadata on platforms that support it */
size += (fw_dump.ops->fadump_get_metadata_size ?
@@ -389,10 +394,11 @@ static unsigned long __init get_fadump_area_size(void)
static int __init add_boot_mem_region(unsigned long rstart,
unsigned long rsize)
{
+ int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns();
int i = fw_dump.boot_mem_regs_cnt++;
- if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
- fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) {
+ fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns;
return 0;
}
@@ -573,22 +579,6 @@ int __init fadump_reserve_mem(void)
}
}
- /*
- * Calculate the memory boundary.
- * If memory_limit is less than actual memory boundary then reserve
- * the memory for fadump beyond the memory_limit and adjust the
- * memory_limit accordingly, so that the running kernel can run with
- * specified memory_limit.
- */
- if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
- size = get_fadump_area_size();
- if ((memory_limit + size) < memblock_end_of_DRAM())
- memory_limit += size;
- else
- memory_limit = memblock_end_of_DRAM();
- printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
- " dump, now %#016llx\n", memory_limit);
- }
if (memory_limit)
mem_boundary = memory_limit;
else
@@ -705,7 +695,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
* old_cpu == -1 means this is the first CPU which has come here,
* go ahead and trigger fadump.
*
- * old_cpu != -1 means some other CPU has already on it's way
+ * old_cpu != -1 means some other CPU has already on its way
* to trigger fadump, just keep looping here.
*/
this_cpu = smp_processor_id();
@@ -931,36 +921,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
return 0;
}
-static int fadump_exclude_reserved_area(u64 start, u64 end)
-{
- u64 ra_start, ra_end;
- int ret = 0;
-
- ra_start = fw_dump.reserve_dump_area_start;
- ra_end = ra_start + fw_dump.reserve_dump_area_size;
-
- if ((ra_start < end) && (ra_end > start)) {
- if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_mem_range(&crash_mrange_info,
- start, ra_start);
- if (ret)
- return ret;
-
- ret = fadump_add_mem_range(&crash_mrange_info,
- ra_end, end);
- } else if (start < ra_start) {
- ret = fadump_add_mem_range(&crash_mrange_info,
- start, ra_start);
- } else if (ra_end < end) {
- ret = fadump_add_mem_range(&crash_mrange_info,
- ra_end, end);
- }
- } else
- ret = fadump_add_mem_range(&crash_mrange_info, start, end);
-
- return ret;
-}
-
static int fadump_init_elfcore_header(char *bufp)
{
struct elfhdr *elf;
@@ -998,52 +958,6 @@ static int fadump_init_elfcore_header(char *bufp)
}
/*
- * Traverse through memblock structure and setup crash memory ranges. These
- * ranges will be used create PT_LOAD program headers in elfcore header.
- */
-static int fadump_setup_crash_memory_ranges(void)
-{
- u64 i, start, end;
- int ret;
-
- pr_debug("Setup crash memory ranges.\n");
- crash_mrange_info.mem_range_cnt = 0;
-
- /*
- * Boot memory region(s) registered with firmware are moved to
- * different location at the time of crash. Create separate program
- * header(s) for this memory chunk(s) with the correct offset.
- */
- for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
- start = fw_dump.boot_mem_addr[i];
- end = start + fw_dump.boot_mem_sz[i];
- ret = fadump_add_mem_range(&crash_mrange_info, start, end);
- if (ret)
- return ret;
- }
-
- for_each_mem_range(i, &start, &end) {
- /*
- * skip the memory chunk that is already added
- * (0 through boot_memory_top).
- */
- if (start < fw_dump.boot_mem_top) {
- if (end > fw_dump.boot_mem_top)
- start = fw_dump.boot_mem_top;
- else
- continue;
- }
-
- /* add this range excluding the reserved dump area. */
- ret = fadump_exclude_reserved_area(start, end);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/*
* If the given physical address falls within the boot memory region then
* return the relocated address that points to the dump region reserved
* for saving initial boot memory contents.
@@ -1073,36 +987,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr)
return raddr;
}
-static int fadump_create_elfcore_headers(char *bufp)
+static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start,
+ u64 size, unsigned long long offset)
{
- unsigned long long raddr, offset;
- struct elf_phdr *phdr;
+ phdr->p_align = 0;
+ phdr->p_memsz = size;
+ phdr->p_filesz = size;
+ phdr->p_paddr = start;
+ phdr->p_offset = offset;
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_vaddr = (unsigned long)__va(start);
+}
+
+static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh)
+{
+ char *bufp;
struct elfhdr *elf;
- int i, j;
+ struct elf_phdr *phdr;
+ u64 boot_mem_dest_offset;
+ unsigned long long i, ra_start, ra_end, ra_size, mstart, mend;
+ bufp = (char *) fw_dump.elfcorehdr_addr;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/*
- * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
- * will be populated during second kernel boot after crash. Hence
- * this PT_NOTE will always be the first elf note.
+ * Set up ELF PT_NOTE, a placeholder for CPU notes information.
+ * The notes info will be populated later by platform-specific code.
+ * Hence, this PT_NOTE will always be the first ELF note.
*
* NOTE: Any new ELF note addition should be placed after this note.
*/
phdr = (struct elf_phdr *)bufp;
bufp += sizeof(struct elf_phdr);
phdr->p_type = PT_NOTE;
- phdr->p_flags = 0;
- phdr->p_vaddr = 0;
- phdr->p_align = 0;
-
- phdr->p_offset = 0;
- phdr->p_paddr = 0;
- phdr->p_filesz = 0;
- phdr->p_memsz = 0;
-
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+ phdr->p_offset = 0;
+ phdr->p_paddr = 0;
+ phdr->p_filesz = 0;
+ phdr->p_memsz = 0;
+ /* Increment number of program headers. */
(elf->e_phnum)++;
/* setup ELF PT_NOTE for vmcoreinfo */
@@ -1112,55 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = 0;
phdr->p_vaddr = 0;
phdr->p_align = 0;
-
- phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
- phdr->p_offset = phdr->p_paddr;
- phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
-
+ phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr;
+ phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size;
/* Increment number of program headers. */
(elf->e_phnum)++;
- /* setup PT_LOAD sections. */
- j = 0;
- offset = 0;
- raddr = fw_dump.boot_mem_addr[0];
- for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
- u64 mbase, msize;
-
- mbase = crash_mrange_info.mem_ranges[i].base;
- msize = crash_mrange_info.mem_ranges[i].size;
- if (!msize)
- continue;
-
+ /*
+ * Setup PT_LOAD sections. first include boot memory regions
+ * and then add rest of the memory regions.
+ */
+ boot_mem_dest_offset = fw_dump.boot_mem_dest_addr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
phdr = (struct elf_phdr *)bufp;
bufp += sizeof(struct elf_phdr);
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = mbase;
-
- if (mbase == raddr) {
- /*
- * The entire real memory region will be moved by
- * firmware to the specified destination_address.
- * Hence set the correct offset.
- */
- phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
- if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
- offset += fw_dump.boot_mem_sz[j];
- raddr = fw_dump.boot_mem_addr[++j];
- }
+ populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i],
+ fw_dump.boot_mem_sz[i],
+ boot_mem_dest_offset);
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ boot_mem_dest_offset += fw_dump.boot_mem_sz[i];
+ }
+
+ /* Memory reserved for fadump in first kernel */
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_size = get_fadump_area_size();
+ ra_end = ra_start + ra_size;
+
+ phdr = (struct elf_phdr *)bufp;
+ for_each_mem_range(i, &mstart, &mend) {
+ /* Boot memory regions already added, skip them now */
+ if (mstart < fw_dump.boot_mem_top) {
+ if (mend > fw_dump.boot_mem_top)
+ mstart = fw_dump.boot_mem_top;
+ else
+ continue;
}
- phdr->p_paddr = mbase;
- phdr->p_vaddr = (unsigned long)__va(mbase);
- phdr->p_filesz = msize;
- phdr->p_memsz = msize;
- phdr->p_align = 0;
+ /* Handle memblock regions overlaps with fadump reserved area */
+ if ((ra_start < mend) && (ra_end > mstart)) {
+ if ((mstart < ra_start) && (mend > ra_end)) {
+ populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ bufp += sizeof(struct elf_phdr);
+ phdr = (struct elf_phdr *)bufp;
+ populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
+ } else if (mstart < ra_start) {
+ populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
+ } else if (ra_end < mend) {
+ populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
+ }
+ } else {
+ /* No overlap with fadump reserved memory region */
+ populate_elf_pt_load(phdr, mstart, mend - mstart, mstart);
+ }
/* Increment number of program headers. */
(elf->e_phnum)++;
+ bufp += sizeof(struct elf_phdr);
+ phdr = (struct elf_phdr *) bufp;
}
- return 0;
}
static unsigned long init_fadump_header(unsigned long addr)
@@ -1175,14 +1114,25 @@ static unsigned long init_fadump_header(unsigned long addr)
memset(fdh, 0, sizeof(struct fadump_crash_info_header));
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
- fdh->elfcorehdr_addr = addr;
+ fdh->version = FADUMP_HEADER_VERSION;
/* We will set the crashing cpu id in crash_fadump() during crash. */
fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
+
+ /*
+ * The physical address and size of vmcoreinfo are required in the
+ * second kernel to prepare elfcorehdr.
+ */
+ fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note());
+ fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE;
+
+
+ fdh->pt_regs_sz = sizeof(struct pt_regs);
/*
* When LPAR is terminated by PYHP, ensure all possible CPUs'
* register data is processed while exporting the vmcore.
*/
fdh->cpu_mask = *cpu_possible_mask;
+ fdh->cpu_mask_sz = sizeof(struct cpumask);
return addr;
}
@@ -1190,8 +1140,6 @@ static unsigned long init_fadump_header(unsigned long addr)
static int register_fadump(void)
{
unsigned long addr;
- void *vaddr;
- int ret;
/*
* If no memory is reserved then we can not register for firmware-
@@ -1200,18 +1148,10 @@ static int register_fadump(void)
if (!fw_dump.reserve_dump_area_size)
return -ENODEV;
- ret = fadump_setup_crash_memory_ranges();
- if (ret)
- return ret;
-
addr = fw_dump.fadumphdr_addr;
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
- vaddr = __va(addr);
-
- pr_debug("Creating ELF core headers at %#016lx\n", addr);
- fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
pr_debug("Registering for firmware-assisted kernel dump...\n");
@@ -1230,7 +1170,6 @@ void fadump_cleanup(void)
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
fw_dump.ops->fadump_unregister(&fw_dump);
- fadump_free_mem_ranges(&crash_mrange_info);
}
if (fw_dump.ops->fadump_cleanup)
@@ -1416,6 +1355,22 @@ static void fadump_release_memory(u64 begin, u64 end)
fadump_release_reserved_area(tstart, end);
}
+static void fadump_free_elfcorehdr_buf(void)
+{
+ if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0)
+ return;
+
+ /*
+ * Before freeing the memory of `elfcorehdr`, reset the global
+ * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing
+ * invalid memory.
+ */
+ elfcorehdr_addr = ELFCORE_ADDR_ERR;
+ fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size);
+ fw_dump.elfcorehdr_addr = 0;
+ fw_dump.elfcorehdr_size = 0;
+}
+
static void fadump_invalidate_release_mem(void)
{
mutex_lock(&fadump_mutex);
@@ -1427,6 +1382,7 @@ static void fadump_invalidate_release_mem(void)
fadump_cleanup();
mutex_unlock(&fadump_mutex);
+ fadump_free_elfcorehdr_buf();
fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
fadump_free_cpu_notes_buf();
@@ -1484,6 +1440,18 @@ static ssize_t enabled_show(struct kobject *kobj,
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
+/*
+ * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates
+ * to usersapce that fadump re-registration is not required on memory
+ * hotplug events.
+ */
+static ssize_t hotplug_ready_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", 1);
+}
+
static ssize_t mem_reserved_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -1498,6 +1466,43 @@ static ssize_t registered_show(struct kobject *kobj,
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
+static ssize_t bootargs_append_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area));
+}
+
+static ssize_t bootargs_append_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ char *params;
+
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
+ return -EPERM;
+
+ if (count >= COMMAND_LINE_SIZE)
+ return -EINVAL;
+
+ /*
+ * Fail here instead of handling this scenario with
+ * some silly workaround in capture kernel.
+ */
+ if (saved_command_line_len + count >= COMMAND_LINE_SIZE) {
+ pr_err("Appending parameters exceeds cmdline size!\n");
+ return -ENOSPC;
+ }
+
+ params = __va(fw_dump.param_area);
+ strscpy_pad(params, buf, COMMAND_LINE_SIZE);
+ /* Remove newline character at the end. */
+ if (params[count-1] == '\n')
+ params[count-1] = '\0';
+
+ return count;
+}
+
static ssize_t registered_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -1556,11 +1561,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
static struct kobj_attribute register_attr = __ATTR_RW(registered);
static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready);
+static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append);
static struct attribute *fadump_attrs[] = {
&enable_attr.attr,
&register_attr.attr,
&mem_reserved_attr.attr,
+ &hotplug_ready_attr.attr,
NULL,
};
@@ -1632,6 +1640,150 @@ static void __init fadump_init_files(void)
return;
}
+static int __init fadump_setup_elfcorehdr_buf(void)
+{
+ int elf_phdr_cnt;
+ unsigned long elfcorehdr_size;
+
+ /*
+ * Program header for CPU notes comes first, followed by one for
+ * vmcoreinfo, and the remaining program headers correspond to
+ * memory regions.
+ */
+ elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory);
+ elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr));
+ elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size);
+
+ fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size);
+ if (!fw_dump.elfcorehdr_addr) {
+ pr_err("Failed to allocate %lu bytes for elfcorehdr\n",
+ elfcorehdr_size);
+ return -ENOMEM;
+ }
+ fw_dump.elfcorehdr_size = elfcorehdr_size;
+ return 0;
+}
+
+/*
+ * Check if the fadump header of crashed kernel is compatible with fadump kernel.
+ *
+ * It checks the magic number, endianness, and size of non-primitive type
+ * members of fadump header to ensure safe dump collection.
+ */
+static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh)
+{
+ if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) {
+ pr_err("Old magic number, can't process the dump.\n");
+ return false;
+ }
+
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC))
+ pr_err("Endianness mismatch between the crashed and fadump kernels.\n");
+ else
+ pr_err("Fadump header is corrupted.\n");
+
+ return false;
+ }
+
+ /*
+ * Dump collection is not safe if the size of non-primitive type members
+ * of the fadump header do not match between crashed and fadump kernel.
+ */
+ if (fdh->pt_regs_sz != sizeof(struct pt_regs) ||
+ fdh->cpu_mask_sz != sizeof(struct cpumask)) {
+ pr_err("Fadump header size mismatch.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void __init fadump_process(void)
+{
+ struct fadump_crash_info_header *fdh;
+
+ fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr);
+ if (!fdh) {
+ pr_err("Crash info header is empty.\n");
+ goto err_out;
+ }
+
+ /* Avoid processing the dump if fadump header isn't compatible */
+ if (!is_fadump_header_compatible(fdh))
+ goto err_out;
+
+ /* Allocate buffer for elfcorehdr */
+ if (fadump_setup_elfcorehdr_buf())
+ goto err_out;
+
+ fadump_populate_elfcorehdr(fdh);
+
+ /* Let platform update the CPU notes in elfcorehdr */
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
+ goto err_out;
+
+ /*
+ * elfcorehdr is now ready to be exported.
+ *
+ * set elfcorehdr_addr so that vmcore module will export the
+ * elfcorehdr through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr);
+ return;
+
+err_out:
+ fadump_invalidate_release_mem();
+}
+
+/*
+ * Reserve memory to store additional parameters to be passed
+ * for fadump/capture kernel.
+ */
+static void __init fadump_setup_param_area(void)
+{
+ phys_addr_t range_start, range_end;
+
+ if (!fw_dump.param_area_supported || fw_dump.dump_active)
+ return;
+
+ /* This memory can't be used by PFW or bootloader as it is shared across kernels */
+ if (radix_enabled()) {
+ /*
+ * Anywhere in the upper half should be good enough as all memory
+ * is accessible in real mode.
+ */
+ range_start = memblock_end_of_DRAM() / 2;
+ range_end = memblock_end_of_DRAM();
+ } else {
+ /*
+ * Passing additional parameters is supported for hash MMU only
+ * if the first memory block size is 768MB or higher.
+ */
+ if (ppc64_rma_size < 0x30000000)
+ return;
+
+ /*
+ * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving
+ * memory for passing additional parameters in this range to avoid
+ * being stomped on by PFW/bootloader.
+ */
+ range_start = 0x2A000000;
+ range_end = range_start + 0x4000000;
+ }
+
+ fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE,
+ COMMAND_LINE_SIZE,
+ range_start,
+ range_end);
+ if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
+ pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
+ return;
+ }
+
+ memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
+}
+
/*
* Prepare for firmware-assisted dump.
*/
@@ -1651,15 +1803,11 @@ int __init setup_fadump(void)
* saving it to the disk.
*/
if (fw_dump.dump_active) {
- /*
- * if dump process fails then invalidate the registration
- * and release memory before proceeding for re-registration.
- */
- if (fw_dump.ops->fadump_process(&fw_dump) < 0)
- fadump_invalidate_release_mem();
+ fadump_process();
}
/* Initialize the kernel dump memory structure and register with f/w */
else if (fw_dump.reserve_dump_area_size) {
+ fadump_setup_param_area();
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
register_fadump();
}
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 647b0b445e89..edc479a7c2bc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -199,12 +199,12 @@ instruction_counter:
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mtspr SPRN_MD_EPN, r10
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
mfcr r11
compare_to_kernel_boundary r10, r10
#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
blt+ 3f
rlwinm r10, r10, 0, 20, 31
oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index c1d89764dd22..57196883a00e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -419,14 +419,14 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SDR1
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
rlwinm r2, r2, 28, 0xfffff000
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
li r0, 3
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -442,7 +442,7 @@ InstructionTLBMiss:
andc. r1,r1,r2 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_EXECMEM
rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */
#endif
ori r1, r1, 0xe06 /* clear out reserved bits */
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 072ebe7f290b..f8208c027148 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
struct pt_regs *regs;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(nip, parent_nip);
if (bit < 0)
return;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..14c5ddec3056 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
-#include <linux/moduleloader.h>
#include <linux/set_memory.h>
+#include <linux/execmem.h>
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
@@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offse
return (kprobe_opcode_t *)(addr + offset);
}
-void *alloc_insn_page(void)
-{
- void *page;
-
- page = module_alloc(PAGE_SIZE);
- if (!page)
- return NULL;
-
- if (strict_module_rwx_enabled()) {
- int err = set_memory_rox((unsigned long)page, 1);
-
- if (err)
- goto error;
- }
- return page;
-error:
- module_memfree(page);
- return NULL;
-}
-
int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1a8cdafd68e8..91123e102db4 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -192,7 +192,7 @@ _GLOBAL(scom970_read)
xori r0,r0,MSR_EE
mtmsrd r0,1
- /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits
* (including parity). On current CPUs they must be 0'd,
* and finally or in RW bit
*/
@@ -226,7 +226,7 @@ _GLOBAL(scom970_write)
xori r0,r0,MSR_EE
mtmsrd r0,1
- /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits
* (including parity). On current CPUs they must be 0'd.
*/
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index f6d6ae0a1692..baeb24c102c8 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -7,7 +7,6 @@
#include <linux/elf.h>
#include <linux/moduleloader.h>
#include <linux/err.h>
-#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/bug.h>
#include <asm/module.h>
@@ -17,8 +16,6 @@
#include <asm/setup.h>
#include <asm/sections.h>
-static LIST_HEAD(module_bug_list);
-
static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
const char *name)
@@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr,
return 0;
}
-
-static __always_inline void *
-__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
-{
- pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
- gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
-
- /*
- * Don't do huge page allocations for modules yet until more testing
- * is done. STRICT_MODULE_RWX may require extra work to support this
- * too.
- */
- return __vmalloc_node_range(size, 1, start, end, gfp, prot,
- VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
-void *module_alloc(unsigned long size)
-{
-#ifdef MODULES_VADDR
- unsigned long limit = (unsigned long)_etext - SZ_32M;
- void *ptr = NULL;
-
- BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
-
- /* First try within 32M limit from _etext to avoid branch trampolines */
- if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
- ptr = __module_alloc(size, limit, MODULES_END, true);
-
- if (!ptr)
- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
-
- return ptr;
-#else
- return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
-#endif
-}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9452a54d356c..a7671786764b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1185,6 +1185,9 @@ static inline void save_sprs(struct thread_struct *t)
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
t->hashkeyr = mfspr(SPRN_HASHKEYR);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ t->dexcr = mfspr(SPRN_DEXCR);
#endif
}
@@ -1267,6 +1270,10 @@ static inline void restore_sprs(struct thread_struct *old_thread,
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
old_thread->hashkeyr != new_thread->hashkeyr)
mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ old_thread->dexcr != new_thread->dexcr)
+ mtspr(SPRN_DEXCR, new_thread->dexcr);
#endif
}
@@ -1634,6 +1641,13 @@ void arch_setup_new_exec(void)
current->thread.regs->amr = default_amr;
current->thread.regs->iamr = default_iamr;
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ current->thread.dexcr = current->thread.dexcr_onexec;
+ mtspr(SPRN_DEXCR, current->thread.dexcr);
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
}
#ifdef CONFIG_PPC64
@@ -1647,7 +1661,7 @@ void arch_setup_new_exec(void)
* cases will happen:
*
* 1. The correct thread is running, the wrong thread is not
- * In this situation, the correct thread is woken and proceeds to pass it's
+ * In this situation, the correct thread is woken and proceeds to pass its
* condition check.
*
* 2. Neither threads are running
@@ -1657,15 +1671,15 @@ void arch_setup_new_exec(void)
* for the wrong thread, or they will execute the condition check immediately.
*
* 3. The wrong thread is running, the correct thread is not
- * The wrong thread will be woken, but will fail it's condition check and
+ * The wrong thread will be woken, but will fail its condition check and
* re-execute wait. The correct thread, when scheduled, will execute either
- * it's condition check (which will pass), or wait, which returns immediately
- * when called the first time after the thread is scheduled, followed by it's
+ * its condition check (which will pass), or wait, which returns immediately
+ * when called the first time after the thread is scheduled, followed by its
* condition check (which will pass).
*
* 4. Both threads are running
- * Both threads will be woken. The wrong thread will fail it's condition check
- * and execute another wait, while the correct thread will pass it's condition
+ * Both threads will be woken. The wrong thread will fail its condition check
+ * and execute another wait, while the correct thread will pass its condition
* check.
*
* @t: the task to set the thread ID for
@@ -1878,6 +1892,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
p->thread.hashkeyr = current->thread.hashkeyr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ p->thread.dexcr = mfspr(SPRN_DEXCR);
#endif
return 0;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index cd8d8883de90..60819751e55e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -779,7 +779,7 @@ static inline void save_fscr_to_task(void) {}
void __init early_init_devtree(void *params)
{
- phys_addr_t limit;
+ phys_addr_t int_vector_size;
DBG(" -> early_init_devtree(%px)\n", params);
@@ -813,6 +813,9 @@ void __init early_init_devtree(void *params)
*/
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
+ /* Append additional parameters passed for fadump capture kernel */
+ fadump_append_bootargs();
+
/* Scan memory nodes and rebuild MEMBLOCKs */
early_init_dt_scan_root();
early_init_dt_scan_memory_ppc();
@@ -832,9 +835,16 @@ void __init early_init_devtree(void *params)
setup_initial_memory_limit(memstart_addr, first_memblock_size);
/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
+#ifdef CONFIG_PPC64
+ /* If relocatable, reserve at least 32k for interrupt vectors etc. */
+ int_vector_size = __end_interrupts - _stext;
+ int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size);
+#else
/* If relocatable, reserve first 32k for interrupt vectors etc. */
+ int_vector_size = SZ_32K;
+#endif
if (PHYSICAL_START > MEMORY_START)
- memblock_reserve(MEMORY_START, 0x8000);
+ memblock_reserve(MEMORY_START, int_vector_size);
reserve_kdump_trampoline();
#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
@@ -846,9 +856,12 @@ void __init early_init_devtree(void *params)
reserve_crashkernel();
early_reserve_mem();
- /* Ensure that total memory size is page-aligned. */
- limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
- memblock_enforce_memory_limit(limit);
+ if (memory_limit > memblock_phys_mem_size())
+ memory_limit = 0;
+
+ /* Align down to 16 MB which is large page size with hash page translation */
+ memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M);
+ memblock_enforce_memory_limit(memory_limit);
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
if (!early_radix_enabled())
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 0ef358285337..fbb68fc28ed3 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void)
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
#ifdef CONFIG_PPC64
- /* Align to 16 MB == size of ppc64 large page */
- prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
+ /* Align down to 16 MB which is large page size with hash page translation */
+ prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M);
#endif
}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
index 210ea834e603..447bff87fd21 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-tm.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk)
{
/*
* If task is not current, it will have been flushed already to
- * it's thread_struct during __switch_to().
+ * its thread_struct during __switch_to().
*
* A reclaim flushes ALL the state or if not in TM save TM SPRs
* in the appropriate thread structures from live.
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 584cf5c3df50..c1819e0a6684 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -469,12 +469,7 @@ static int dexcr_get(struct task_struct *target, const struct user_regset *regse
if (!cpu_has_feature(CPU_FTR_ARCH_31))
return -ENODEV;
- /*
- * The DEXCR is currently static across all CPUs, so we don't
- * store the target's value anywhere, but the static value
- * will also be correct.
- */
- membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT));
+ membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr));
/*
* Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 01ed1263e1a9..4bd2f87616ba 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -405,7 +405,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
cpumask_set_cpu(i, &threads_core_mask);
printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
- tpc, tpc > 1 ? "s" : "");
+ tpc, str_plural(tpc));
printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2f19d5e94485..ae36a129789f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -834,6 +834,7 @@ static __init int pcpu_cpu_to_node(int cpu)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
void __init setup_per_cpu_areas(void)
{
@@ -876,6 +877,7 @@ void __init setup_per_cpu_areas(void)
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
+ static_key_enable(&__percpu_first_chunk_is_paged.key);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 12e53b3d7923..46e6d2cd7a2d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1567,7 +1567,7 @@ static void add_cpu_to_masks(int cpu)
/*
* This CPU will not be in the online mask yet so we need to manually
- * add it to it's own thread sibling mask.
+ * add it to its own thread sibling mask.
*/
map_cpu_to_node(cpu, cpu_to_node(cpu));
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0f39a6b84132..b842c83ab497 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -139,7 +139,7 @@ static unsigned long dscr_default;
* @val: Returned cpu specific DSCR default value
*
* This function returns the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
+ * for any cpu which is contained in its PACA structure.
*/
static void read_dscr(void *val)
{
@@ -152,7 +152,7 @@ static void read_dscr(void *val)
* @val: New cpu specific DSCR default value to update
*
* This function updates the per cpu DSCR default value
- * for any cpu which is contained in it's PACA structure.
+ * for any cpu which is contained in its PACA structure.
*/
static void write_dscr(void *val)
{
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 8e469c4da3f8..470eb0453e17 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -3,11 +3,11 @@
# Makefile for the linux kernel.
#
-obj-y += core.o core_$(BITS).o
+obj-y += core.o core_$(BITS).o ranges.o
obj-$(CONFIG_PPC32) += relocate_32.o
-obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_CRASH_DUMP) += crash.o
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 762e4d09aacf..85050be08a23 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -17,6 +17,7 @@
#include <linux/cpu.h>
#include <linux/hardirq.h>
#include <linux/of.h>
+#include <linux/libfdt.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -30,6 +31,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
+#include <asm/crashdump-ppc64.h>
int machine_kexec_prepare(struct kimage *image)
{
@@ -419,3 +421,92 @@ static int __init export_htab_values(void)
}
late_initcall(export_htab_values);
#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int update_cpus_node(void *fdt)
+{
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ if (cpus_offset > 0) {
+ ret = fdt_del_node(fdt, cpus_offset);
+ if (ret < 0) {
+ pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ /* Add cpus node to fdt */
+ cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+ if (cpus_offset < 0) {
+ pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
+ return -EINVAL;
+ }
+
+ /* Add cpus node properties */
+ cpus_node = of_find_node_by_path("/cpus");
+ ret = add_node_props(fdt, cpus_offset, cpus_node);
+ of_node_put(cpus_node);
+ if (ret < 0)
+ return ret;
+
+ /* Loop through all subnodes of cpus and add them to fdt */
+ for_each_node_by_type(dn, "cpu") {
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index ef5c2d25ec39..9ac3266e4965 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -16,6 +16,8 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/types.h>
+#include <linux/libfdt.h>
+#include <linux/memory.h>
#include <asm/processor.h>
#include <asm/machdep.h>
@@ -24,6 +26,7 @@
#include <asm/setjmp.h>
#include <asm/debug.h>
#include <asm/interrupt.h>
+#include <asm/kexec_ranges.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -392,3 +395,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned long phdr_cnt;
+
+ /* A program header for possible CPUs + vmcoreinfo */
+ phdr_cnt = num_possible_cpus() + 1;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
+
+ return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is part of crash memory ranges,
+ * remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
+ goto out;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ goto out;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate growing
+ * number of crash memory ranges while loading the kdump kernel. It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ kvfree(cmem);
+ kvfree(elfbuf);
+}
+
+/**
+ * get_fdt_index - Loop through the kexec segment array and find
+ * the index of the FDT segment.
+ * @image: a pointer to kexec_crash_image
+ *
+ * Returns the index of FDT segment in the kexec segment array
+ * if found; otherwise -1.
+ */
+static int get_fdt_index(struct kimage *image)
+{
+ void *ptr;
+ unsigned long mem;
+ int i, fdt_index = -1;
+
+ /* Find the FDT segment index in kexec segment array. */
+ for (i = 0; i < image->nr_segments; i++) {
+ mem = image->segment[i].mem;
+ ptr = __va(mem);
+
+ if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+ fdt_index = i;
+ break;
+ }
+ }
+
+ return fdt_index;
+}
+
+/**
+ * update_crash_fdt - updates the cpus node of the crash FDT.
+ *
+ * @image: a pointer to kexec_crash_image
+ */
+static void update_crash_fdt(struct kimage *image)
+{
+ void *fdt;
+ int fdt_index;
+
+ fdt_index = get_fdt_index(image);
+ if (fdt_index < 0) {
+ pr_err("Unable to locate FDT segment.\n");
+ return;
+ }
+
+ fdt = __va((void *)image->segment[fdt_index].mem);
+
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* update FDT to reflect changes in CPU resources */
+ if (update_cpus_node(fdt))
+ pr_err("Failed to update crash FDT");
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+}
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
+{
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
+#endif
+ return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT;
+}
+
+/**
+ * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the
+ * necessary kexec segments based on the hotplug event.
+ * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case.
+ *
+ * Update the kdump image based on the type of hotplug event, represented by image->hp_action.
+ * CPU add: Update the FDT segment to include the newly added CPU.
+ * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs
+ * part of the FDT.
+ * Memory add/remove: No action is taken as this is not yet supported.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
+{
+ struct memory_notify *mn;
+
+ switch (image->hp_action) {
+ case KEXEC_CRASH_HP_REMOVE_CPU:
+ return;
+
+ case KEXEC_CRASH_HP_ADD_CPU:
+ update_crash_fdt(image);
+ break;
+
+ case KEXEC_CRASH_HP_REMOVE_MEMORY:
+ case KEXEC_CRASH_HP_ADD_MEMORY:
+ mn = (struct memory_notify *)arg;
+ update_crash_elfcorehdr(image, mn);
+ return;
+ default:
+ pr_warn_once("Unknown hotplug action\n");
+ }
+}
+#endif /* CONFIG_CRASH_HOTPLUG */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 6d8951e8e966..214c071c58ed 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -116,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
if (ret)
goto out_free_fdt;
- fdt_pack(fdt);
+ if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH)
+ fdt_pack(fdt);
kbuf.buffer = fdt;
kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 1bc65de6174f..925a69ad2468 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -30,6 +30,7 @@
#include <asm/iommu.h>
#include <asm/prom.h>
#include <asm/plpks.h>
+#include <asm/cputhreads.h>
struct umem_info {
__be64 *buf; /* data buffer for usable-memory property */
@@ -48,83 +49,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
};
/**
- * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
- * regions like opal/rtas, tce-table, initrd,
- * kernel, htab which should be avoided while
- * setting up kexec load segments.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- ret = add_tce_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_initrd_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_htab_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_kernel_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_reserved_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- /* exclude memory ranges should be sorted for easy lookup */
- sort_memory_ranges(*mem_ranges, true);
-out:
- if (ret)
- pr_err("Failed to setup exclude memory ranges\n");
- return ret;
-}
-
-/**
- * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
- * memory regions that should be added to the
- * memory reserve map to ensure the region is
- * protected from any mischief.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_tce_mem_ranges(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_reserved_mem_ranges(mem_ranges);
-out:
- if (ret)
- pr_err("Failed to setup reserved memory ranges\n");
- return ret;
-}
-
-/**
* __locate_mem_hole_top_down - Looks top down for a large enough memory hole
* in the memory regions between buf_min & buf_max
* for the buffer. If found, sets kbuf->mem.
@@ -323,119 +247,6 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
#ifdef CONFIG_CRASH_DUMP
/**
- * get_usable_memory_ranges - Get usable memory ranges. This list includes
- * regions like crashkernel, opal/rtas & tce-table,
- * that kdump kernel could use.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
-{
- int ret;
-
- /*
- * Early boot failure observed on guests when low memory (first memory
- * block?) is not added to usable memory. So, add [0, crashk_res.end]
- * instead of [crashk_res.start, crashk_res.end] to workaround it.
- * Also, crashed kernel's memory must be added to reserve map to
- * avoid kdump kernel from using it.
- */
- ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
- if (ret)
- goto out;
-
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_tce_mem_ranges(mem_ranges);
-out:
- if (ret)
- pr_err("Failed to setup usable memory ranges\n");
- return ret;
-}
-
-/**
- * get_crash_memory_ranges - Get crash memory ranges. This list includes
- * first/crashing kernel's memory regions that
- * would be exported via an elfcore.
- * @mem_ranges: Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
-{
- phys_addr_t base, end;
- struct crash_mem *tmem;
- u64 i;
- int ret;
-
- for_each_mem_range(i, &base, &end) {
- u64 size = end - base;
-
- /* Skip backup memory region, which needs a separate entry */
- if (base == BACKUP_SRC_START) {
- if (size > BACKUP_SRC_SIZE) {
- base = BACKUP_SRC_END + 1;
- size -= BACKUP_SRC_SIZE;
- } else
- continue;
- }
-
- ret = add_mem_range(mem_ranges, base, size);
- if (ret)
- goto out;
-
- /* Try merging adjacent ranges before reallocation attempt */
- if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
- sort_memory_ranges(*mem_ranges, true);
- }
-
- /* Reallocate memory ranges if there is no space to split ranges */
- tmem = *mem_ranges;
- if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
- tmem = realloc_mem_ranges(mem_ranges);
- if (!tmem)
- goto out;
- }
-
- /* Exclude crashkernel region */
- ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
- if (ret)
- goto out;
-
- /*
- * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
- * regions are exported to save their context at the time of
- * crash, they should actually be backed up just like the
- * first 64K bytes of memory.
- */
- ret = add_rtas_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- ret = add_opal_mem_range(mem_ranges);
- if (ret)
- goto out;
-
- /* create a separate program header for the backup region */
- ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
- if (ret)
- goto out;
-
- sort_memory_ranges(*mem_ranges, false);
-out:
- if (ret)
- pr_err("Failed to setup crash memory ranges\n");
- return ret;
-}
-
-/**
* check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
* @um_info: Usable memory buffer and ranges info.
* @cnt: No. of entries to accommodate.
@@ -784,6 +595,23 @@ static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
}
}
+static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem)
+{
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned int extra_sz = 0;
+
+ if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM)
+ pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES);
+ else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES)
+ pr_warn("Configured crash mem ranges may not be enough\n");
+ else
+ extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr);
+
+ return extra_sz;
+#endif
+ return 0;
+}
+
/**
* load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
* segment needed to load kdump kernel.
@@ -815,7 +643,8 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
kbuf->buffer = headers;
kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf->bufsz = kbuf->memsz = headers_sz;
+ kbuf->bufsz = headers_sz;
+ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem);
kbuf->top_down = false;
ret = kexec_add_buffer(kbuf);
@@ -979,6 +808,9 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
unsigned int cpu_nodes, extra_size = 0;
struct device_node *dn;
u64 usm_entries;
+#ifdef CONFIG_CRASH_HOTPLUG
+ unsigned int possible_cpu_nodes;
+#endif
if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
return 0;
@@ -1006,6 +838,19 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
if (cpu_nodes > boot_cpu_node_count)
extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+#ifdef CONFIG_CRASH_HOTPLUG
+ /*
+ * Make sure enough space is reserved to accommodate possible CPU nodes
+ * in the crash FDT. This allows packing possible CPU nodes which are
+ * not yet present in the system without regenerating the entire FDT.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ possible_cpu_nodes = num_possible_cpus() / threads_per_core;
+ if (possible_cpu_nodes > cpu_nodes)
+ extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size();
+ }
+#endif
+
return extra_size;
}
@@ -1028,93 +873,6 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
return extra_size + kdump_extra_fdt_size_ppc64(image);
}
-/**
- * add_node_props - Reads node properties from device node structure and add
- * them to fdt.
- * @fdt: Flattened device tree of the kernel
- * @node_offset: offset of the node to add a property at
- * @dn: device node pointer
- *
- * Returns 0 on success, negative errno on error.
- */
-static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
-{
- int ret = 0;
- struct property *pp;
-
- if (!dn)
- return -EINVAL;
-
- for_each_property_of_node(dn, pp) {
- ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
- if (ret < 0) {
- pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
- return ret;
- }
- }
- return ret;
-}
-
-/**
- * update_cpus_node - Update cpus node of flattened device tree using of_root
- * device node.
- * @fdt: Flattened device tree of the kernel.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int update_cpus_node(void *fdt)
-{
- struct device_node *cpus_node, *dn;
- int cpus_offset, cpus_subnode_offset, ret = 0;
-
- cpus_offset = fdt_path_offset(fdt, "/cpus");
- if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
- pr_err("Malformed device tree: error reading /cpus node: %s\n",
- fdt_strerror(cpus_offset));
- return cpus_offset;
- }
-
- if (cpus_offset > 0) {
- ret = fdt_del_node(fdt, cpus_offset);
- if (ret < 0) {
- pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
- return -EINVAL;
- }
- }
-
- /* Add cpus node to fdt */
- cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
- if (cpus_offset < 0) {
- pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
- return -EINVAL;
- }
-
- /* Add cpus node properties */
- cpus_node = of_find_node_by_path("/cpus");
- ret = add_node_props(fdt, cpus_offset, cpus_node);
- of_node_put(cpus_node);
- if (ret < 0)
- return ret;
-
- /* Loop through all subnodes of cpus and add them to fdt */
- for_each_node_by_type(dn, "cpu") {
- cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
- if (cpus_subnode_offset < 0) {
- pr_err("Unable to add %s subnode: %s\n", dn->full_name,
- fdt_strerror(cpus_subnode_offset));
- ret = cpus_subnode_offset;
- goto out;
- }
-
- ret = add_node_props(fdt, cpus_subnode_offset, dn);
- if (ret < 0)
- goto out;
- }
-out:
- of_node_put(dn);
- return ret;
-}
-
static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
const char *propname)
{
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 33b780049aaf..3702b0bdab14 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -20,9 +20,13 @@
#include <linux/kexec.h>
#include <linux/of.h>
#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/crash_core.h>
#include <asm/sections.h>
#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
/**
* get_max_nr_ranges - Get the max no. of ranges crash_mem structure
* could hold, given the size allocated for it.
@@ -234,13 +238,16 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
return __add_mem_range(mem_ranges, base, size);
}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
/**
* add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
* @mem_ranges: Range list to add the memory range(s) to.
*
* Returns 0 on success, negative errno on error.
*/
-int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
{
struct device_node *dn = NULL;
int ret = 0;
@@ -279,7 +286,7 @@ int add_tce_mem_ranges(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_initrd_mem_range(struct crash_mem **mem_ranges)
+static int add_initrd_mem_range(struct crash_mem **mem_ranges)
{
u64 base, end;
int ret;
@@ -296,7 +303,6 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
return ret;
}
-#ifdef CONFIG_PPC_64S_HASH_MMU
/**
* add_htab_mem_range - Adds htab range to the given memory ranges list,
* if it exists
@@ -304,14 +310,18 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_htab_mem_range(struct crash_mem **mem_ranges)
+static int add_htab_mem_range(struct crash_mem **mem_ranges)
{
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!htab_address)
return 0;
return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
-}
+#else
+ return 0;
#endif
+}
/**
* add_kernel_mem_range - Adds kernel text region to the given
@@ -320,18 +330,20 @@ int add_htab_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_kernel_mem_range(struct crash_mem **mem_ranges)
+static int add_kernel_mem_range(struct crash_mem **mem_ranges)
{
return add_mem_range(mem_ranges, 0, __pa(_end));
}
+#endif /* CONFIG_KEXEC_FILE */
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
/**
* add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
* @mem_ranges: Range list to add the memory range to.
*
* Returns 0 on success, negative errno on error.
*/
-int add_rtas_mem_range(struct crash_mem **mem_ranges)
+static int add_rtas_mem_range(struct crash_mem **mem_ranges)
{
struct device_node *dn;
u32 base, size;
@@ -356,7 +368,7 @@ int add_rtas_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_opal_mem_range(struct crash_mem **mem_ranges)
+static int add_opal_mem_range(struct crash_mem **mem_ranges)
{
struct device_node *dn;
u64 base, size;
@@ -374,7 +386,9 @@ int add_opal_mem_range(struct crash_mem **mem_ranges)
of_node_put(dn);
return ret;
}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+#ifdef CONFIG_KEXEC_FILE
/**
* add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
* to the given memory ranges list.
@@ -382,7 +396,7 @@ int add_opal_mem_range(struct crash_mem **mem_ranges)
*
* Returns 0 on success, negative errno on error.
*/
-int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
{
int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
struct device_node *root = of_find_node_by_path("/");
@@ -412,3 +426,283 @@ int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
return ret;
}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ * memory regions that should be added to the
+ * memory reserve map to ensure the region is
+ * protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup reserved memory ranges\n");
+ return ret;
+}
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regions like opal/rtas, tce-table, initrd,
+ * kernel, htab which should be avoided while
+ * setting up kexec load segments.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_initrd_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_htab_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_kernel_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* exclude memory ranges should be sorted for easy lookup */
+ sort_memory_ranges(*mem_ranges, true);
+out:
+ if (ret)
+ pr_err("Failed to setup exclude memory ranges\n");
+ return ret;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ * regions like crashkernel, opal/rtas & tce-table,
+ * that kdump kernel could use.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ /*
+ * Early boot failure observed on guests when low memory (first memory
+ * block?) is not added to usable memory. So, add [0, crashk_res.end]
+ * instead of [crashk_res.start, crashk_res.end] to workaround it.
+ * Also, crashed kernel's memory must be added to reserve map to
+ * avoid kdump kernel from using it.
+ */
+ ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup usable memory ranges\n");
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* CONFIG_KEXEC_FILE */
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ * first/crashing kernel's memory regions that
+ * would be exported via an elfcore.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+ phys_addr_t base, end;
+ struct crash_mem *tmem;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ u64 size = end - base;
+
+ /* Skip backup memory region, which needs a separate entry */
+ if (base == BACKUP_SRC_START) {
+ if (size > BACKUP_SRC_SIZE) {
+ base = BACKUP_SRC_END + 1;
+ size -= BACKUP_SRC_SIZE;
+ } else
+ continue;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ goto out;
+
+ /* Try merging adjacent ranges before reallocation attempt */
+ if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+ sort_memory_ranges(*mem_ranges, true);
+ }
+
+ /* Reallocate memory ranges if there is no space to split ranges */
+ tmem = *mem_ranges;
+ if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+ tmem = realloc_mem_ranges(mem_ranges);
+ if (!tmem)
+ goto out;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ goto out;
+
+ /*
+ * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+ * regions are exported to save their context at the time of
+ * crash, they should actually be backed up just like the
+ * first 64K bytes of memory.
+ */
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* create a separate program header for the backup region */
+ ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+ if (ret)
+ goto out;
+
+ sort_memory_ranges(*mem_ranges, false);
+out:
+ if (ret)
+ pr_err("Failed to setup crash memory ranges\n");
+ return ret;
+}
+
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 8acec144120e..ff6c38373957 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -360,10 +360,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
break;
}
-#if 0
- printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
-#endif
-
if (deliver)
kvmppc_inject_interrupt(vcpu, vec, 0);
@@ -899,11 +895,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 58391b4b32ed..4aa2ab89afbc 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
-extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2b1f0cdd8c18..1b51b1c4713b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
-bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- WARN_ON(range->start + 1 != range->end);
-
- if (kvm_is_radix(kvm))
- kvm_unmap_radix(kvm, range->slot, range->start);
- else
- kvm_unmap_rmapp(kvm, range->slot, range->start);
-
- return false;
-}
-
static int vcpus_running(struct kvm *kvm)
{
return atomic_read(&kvm->arch.vcpus_running) != 0;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5bbfb2eed127..de126d153328 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -714,7 +714,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_HID1:
to_book3s(vcpu)->hid[1] = spr_val;
break;
- case SPRN_HID2:
+ case SPRN_HID2_750FX:
to_book3s(vcpu)->hid[2] = spr_val;
break;
case SPRN_HID2_GEKKO:
@@ -900,7 +900,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
case SPRN_HID1:
*spr_val = to_book3s(vcpu)->hid[1];
break;
- case SPRN_HID2:
+ case SPRN_HID2_750FX:
case SPRN_HID2_GEKKO:
*spr_val = to_book3s(vcpu)->hid[2];
break;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8e86eb577eb8..daaf7faf21a5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4857,7 +4857,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
* entering a nested guest in which case the decrementer is now owned
* by L2 and the L1 decrementer is provided in hdec_expires
*/
- if (!kvmhv_is_nestedv2() && kvmppc_core_pending_dec(vcpu) &&
+ if (kvmppc_core_pending_dec(vcpu) &&
((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
(trap == BOOK3S_INTERRUPT_SYSCALL &&
kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
@@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
.age_gfn = kvm_age_gfn_hv,
.test_age_gfn = kvm_test_age_gfn_hv,
- .set_spte_gfn = kvm_set_spte_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
index 8e6f5355f08b..1091f7a83b25 100644
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -71,8 +71,8 @@ gs_msg_ops_kvmhv_nestedv2_config_fill_info(struct kvmppc_gs_buff *gsb,
}
if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT)) {
- kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT,
- cfg->vcpu_run_output_cfg);
+ rc = kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT,
+ cfg->vcpu_run_output_cfg);
if (rc < 0)
return rc;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5b92619a05fd..a7d7137ea0c8 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /* The page will get remapped properly on its next fault */
- return do_kvm_unmap_gfn(kvm, range);
-}
-
/*****************************************/
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
@@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
.age_gfn = kvm_age_gfn_pr,
.test_age_gfn = kvm_test_age_gfn_pr,
- .set_spte_gfn = kvm_set_spte_gfn_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 29a382249770..1362c672387e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -531,7 +531,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
xc->cppr = xive_prio_from_guest(new_cppr);
/*
- * IPIs are synthetized from MFRR and thus don't need
+ * IPIs are synthesized from MFRR and thus don't need
* any special EOI handling. The underlying interrupt
* used to signal MFRR changes is EOId when fetched from
* the queue.
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index ccb8f16ffe41..c664fdec75b1 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /* The page will get remapped properly on its next fault */
- return kvm_e500_mmu_unmap_gfn(kvm, range);
-}
-
/*****************************************/
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0ab65eeb93ee..f14ecab674a3 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,8 +3,6 @@
# Makefile for ppc-specific library files..
#
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
CFLAGS_code-patching.o += -fno-stack-protector
CFLAGS_feature-fixups.o += -fno-stack-protector
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..0d1f3ee91115 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -225,7 +225,7 @@ void __init poking_init(void)
static unsigned long get_patch_pfn(void *addr)
{
- if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+ if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
return vmalloc_to_pfn(addr);
else
return __pa_symbol(addr) >> PAGE_SHIFT;
@@ -372,9 +372,32 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
}
NOKPROBE_SYMBOL(patch_instruction);
+static int patch_memset64(u64 *addr, u64 val, size_t count)
+{
+ for (u64 *end = addr + count; addr < end; addr++)
+ __put_kernel_nofault(addr, &val, u64, failed);
+
+ return 0;
+
+failed:
+ return -EPERM;
+}
+
+static int patch_memset32(u32 *addr, u32 val, size_t count)
+{
+ for (u32 *end = addr + count; addr < end; addr++)
+ __put_kernel_nofault(addr, &val, u32, failed);
+
+ return 0;
+
+failed:
+ return -EPERM;
+}
+
static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
{
unsigned long start = (unsigned long)patch_addr;
+ int err;
/* Repeat instruction */
if (repeat_instr) {
@@ -383,19 +406,19 @@ static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool rep
if (ppc_inst_prefixed(instr)) {
u64 val = ppc_inst_as_ulong(instr);
- memset64((u64 *)patch_addr, val, len / 8);
+ err = patch_memset64((u64 *)patch_addr, val, len / 8);
} else {
u32 val = ppc_inst_val(instr);
- memset32(patch_addr, val, len / 4);
+ err = patch_memset32(patch_addr, val, len / 4);
}
} else {
- memcpy(patch_addr, code, len);
+ err = copy_to_kernel_nofault(patch_addr, code, len);
}
smp_wmb(); /* smp write barrier */
flush_icache_range(start, start + len);
- return 0;
+ return err;
}
/*
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 4f82581ca203..b7201ba50b2e 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -25,6 +25,13 @@
#include <asm/firmware.h>
#include <asm/inst.h>
+/*
+ * Used to generate warnings if mmu or cpu feature check functions that
+ * use static keys before they are initialized.
+ */
+bool static_key_feature_checks_initialized __read_mostly;
+EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized);
+
struct fixup_entry {
unsigned long mask;
unsigned long value;
@@ -679,6 +686,7 @@ void __init setup_feature_keys(void)
jump_label_init();
cpu_feature_keys_init();
mmu_feature_keys_init();
+ static_key_feature_checks_initialized = true;
}
static int __init check_features(void)
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
index c44823292f73..f76030087f98 100644
--- a/arch/powerpc/lib/test-code-patching.c
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -347,6 +347,97 @@ static void __init test_prefixed_patching(void)
check(!memcmp(iptr, expected, sizeof(expected)));
}
+static void __init test_multi_instruction_patching(void)
+{
+ u32 code[32];
+ void *buf;
+ u32 *addr32;
+ u64 *addr64;
+ ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP());
+ u32 inst32 = PPC_RAW_NOP();
+
+ buf = vzalloc(PAGE_SIZE * 8);
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Test single page 32-bit repeated instruction */
+ addr32 = buf + PAGE_SIZE;
+ check(!patch_instructions(addr32 + 1, &inst32, 12, true));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == inst32);
+ check(addr32[2] == inst32);
+ check(addr32[3] == inst32);
+ check(addr32[4] == 0);
+
+ /* Test single page 64-bit repeated instruction */
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ check(ppc_inst_prefixed(inst64));
+
+ addr64 = buf + PAGE_SIZE * 2;
+ ppc_inst_write(code, inst64);
+ check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true));
+
+ check(addr64[0] == 0);
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64));
+ check(addr64[4] == 0);
+ }
+
+ /* Test single page memcpy */
+ addr32 = buf + PAGE_SIZE * 3;
+
+ for (int i = 0; i < ARRAY_SIZE(code); i++)
+ code[i] = i + 1;
+
+ check(!patch_instructions(addr32 + 1, code, sizeof(code), false));
+
+ check(addr32[0] == 0);
+ check(!memcmp(&addr32[1], code, sizeof(code)));
+ check(addr32[ARRAY_SIZE(code) + 1] == 0);
+
+ /* Test multipage 32-bit repeated instruction */
+ addr32 = buf + PAGE_SIZE * 4 - 8;
+ check(!patch_instructions(addr32 + 1, &inst32, 12, true));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == inst32);
+ check(addr32[2] == inst32);
+ check(addr32[3] == inst32);
+ check(addr32[4] == 0);
+
+ /* Test multipage 64-bit repeated instruction */
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ check(ppc_inst_prefixed(inst64));
+
+ addr64 = buf + PAGE_SIZE * 5 - 8;
+ ppc_inst_write(code, inst64);
+ check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true));
+
+ check(addr64[0] == 0);
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64));
+ check(addr64[4] == 0);
+ }
+
+ /* Test multipage memcpy */
+ addr32 = buf + PAGE_SIZE * 6 - 12;
+
+ for (int i = 0; i < ARRAY_SIZE(code); i++)
+ code[i] = i + 1;
+
+ check(!patch_instructions(addr32 + 1, code, sizeof(code), false));
+
+ check(addr32[0] == 0);
+ check(!memcmp(&addr32[1], code, sizeof(code)));
+ check(addr32[ARRAY_SIZE(code) + 1] == 0);
+
+ vfree(buf);
+}
+
static int __init test_code_patching(void)
{
pr_info("Running code patching self-tests ...\n");
@@ -356,6 +447,7 @@ static int __init test_code_patching(void)
test_create_function_call();
test_translate_branch();
test_prefixed_patching();
+ test_multi_instruction_patching();
return 0;
}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 503a6e249940..0fe2f085c05a 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux ppc-specific parts of the memory manager.
#
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
static bool is_module_segment(unsigned long addr)
{
- if (!IS_ENABLED(CONFIG_MODULES))
+ if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index cad2abc1730f..33af5795856a 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := $(NO_MINIMAL_TOC)
-
obj-y += mmu_context.o pgtable.o trace.o
ifdef CONFIG_PPC_64S_HASH_MMU
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 15189592da09..7186516eca52 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_HIGHMEM
/**
- * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * flush_dcache_icache_phys() - Flush a page by its physical address
* @physaddr: the physical address of the page
*/
static void flush_dcache_icache_phys(unsigned long physaddr)
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
index 11519e88dc6b..43c03b84ff32 100644
--- a/arch/powerpc/mm/kasan/init_book3e_64.c
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -112,7 +112,7 @@ void __init kasan_init(void)
pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
for_each_mem_range(i, &start, &end)
- kasan_init_phys_region((void *)start, (void *)end);
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE);
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
index 9300d641cf9a..3fb5ce4f48f4 100644
--- a/arch/powerpc/mm/kasan/init_book3s_64.c
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -62,7 +62,7 @@ void __init kasan_init(void)
}
for_each_mem_range(i, &start, &end)
- kasan_init_phys_region((void *)start, (void *)end);
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
for (i = 0; i < PTRS_PER_PTE; i++)
__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3a440004b97d..22cc978cdb47 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -16,6 +16,7 @@
#include <linux/highmem.h>
#include <linux/suspend.h>
#include <linux/dma-direct.h>
+#include <linux/execmem.h>
#include <asm/swiotlb.h>
#include <asm/machdep.h>
@@ -30,7 +31,7 @@
#include <mm/mmu_decl.h>
-unsigned long long memory_limit;
+unsigned long long memory_limit __initdata;
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -406,3 +407,66 @@ int devmem_is_allowed(unsigned long pfn)
* the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
*/
EXPORT_SYMBOL_GPL(walk_system_ram_range);
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start, end;
+
+ /*
+ * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and
+ * allow allocating data in the entire vmalloc space
+ */
+#ifdef MODULES_VADDR
+ unsigned long limit = (unsigned long)_etext - SZ_32M;
+
+ BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+ /* First try within 32M limit from _etext to avoid branch trampolines */
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) {
+ start = limit;
+ fallback_start = MODULES_VADDR;
+ fallback_end = MODULES_END;
+ } else {
+ start = MODULES_VADDR;
+ }
+
+ end = MODULES_END;
+#else
+ start = VMALLOC_START;
+ end = VMALLOC_END;
+#endif
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = prot,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = kprobes_prot,
+ .alignment = 1,
+ },
+ [EXECMEM_MODULE_DATA] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index f3894e79d5f7..b3f0498dd42f 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
obj-y += mmu_context.o tlb.o tlb_low.o kup.o
obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
obj-$(CONFIG_40x) += 40x.o
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index cdff129abb14..5c8d1bb98b3e 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -376,7 +376,7 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
}
- /* Copy the kernel to it's new location and run */
+ /* Copy the kernel to its new location and run */
memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 9a601587836b..a6baa6166d94 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -491,7 +491,7 @@ static void walk_vmemmap(struct pg_state *st)
* Traverse the vmemmaped memory and dump pages that are in the hash
* pagetable.
*/
- while (ptr->list) {
+ while (ptr) {
hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
ptr = ptr->list;
}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 0f9a21783329..984655419da5 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -359,3 +359,13 @@ void bpf_jit_free(struct bpf_prog *fp)
bpf_prog_unlock_free(fp);
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+ return IS_ENABLED(CONFIG_PPC64);
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 2f39c50ca729..43b97032a91c 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -450,10 +450,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
}
break;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
- EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg));
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg));
break;
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
- EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg));
+ if (off)
+ EMIT(PPC_RAW_DIVW(_R0, src2_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg));
EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
break;
@@ -467,10 +473,16 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
if (imm == 1) {
EMIT(PPC_RAW_MR(dst_reg, src2_reg));
} else if (is_power_of_2((u32)imm)) {
- EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm)));
+ if (off)
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, ilog2(imm)));
+ else
+ EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm)));
} else {
PPC_LI32(_R0, imm);
- EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0));
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, _R0));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0));
}
break;
case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
@@ -480,11 +492,19 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
if (!is_power_of_2((u32)imm)) {
bpf_set_seen_register(ctx, tmp_reg);
PPC_LI32(tmp_reg, imm);
- EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg));
+ if (off)
+ EMIT(PPC_RAW_DIVW(_R0, src2_reg, tmp_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg));
EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
} else if (imm == 1) {
EMIT(PPC_RAW_LI(dst_reg, 0));
+ } else if (off) {
+ EMIT(PPC_RAW_SRAWI(_R0, src2_reg, ilog2(imm)));
+ EMIT(PPC_RAW_ADDZE(_R0, _R0));
+ EMIT(PPC_RAW_SLWI(_R0, _R0, ilog2(imm)));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
} else {
imm = ilog2((u32)imm);
EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31));
@@ -497,11 +517,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
imm = -imm;
if (!is_power_of_2(imm))
return -EOPNOTSUPP;
- if (imm == 1)
+ if (imm == 1) {
EMIT(PPC_RAW_LI(dst_reg, 0));
- else
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else if (off) {
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, dst_reg_h));
+ EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg));
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31));
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, dst_reg_h));
+ EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg));
+ EMIT(PPC_RAW_SUBFE(dst_reg_h, dst_reg_h, dst_reg_h));
+ } else {
EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31));
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
break;
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
if (!imm)
@@ -727,15 +757,30 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* MOV
*/
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
- if (dst_reg == src_reg)
- break;
- EMIT(PPC_RAW_MR(dst_reg, src_reg));
- EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+ if (off == 8) {
+ EMIT(PPC_RAW_EXTSB(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
+ } else if (off == 16) {
+ EMIT(PPC_RAW_EXTSH(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
+ } else if (off == 32 && dst_reg == src_reg) {
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31));
+ } else if (off == 32) {
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31));
+ } else if (dst_reg != src_reg) {
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+ }
break;
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
/* special mov32 for zext */
if (imm == 1)
EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else if (off == 8)
+ EMIT(PPC_RAW_EXTSB(dst_reg, src_reg));
+ else if (off == 16)
+ EMIT(PPC_RAW_EXTSH(dst_reg, src_reg));
else if (dst_reg != src_reg)
EMIT(PPC_RAW_MR(dst_reg, src_reg));
break;
@@ -751,6 +796,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* BPF_FROM_BE/LE
*/
case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
/* Copy 16 bits to upper part */
@@ -785,6 +831,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
break;
}
+ if (BPF_CLASS(code) == BPF_ALU64 && imm != 64)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
switch (imm) {
@@ -918,11 +966,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* BPF_LDX
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/*
@@ -931,7 +985,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* load only if addr is kernel address (see is_kernel_addr()), otherwise
* set dst_reg=0 and move on.
*/
- if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ if (BPF_MODE(code) == BPF_PROBE_MEM || BPF_MODE(code) == BPF_PROBE_MEMSX) {
PPC_LI32(_R0, TASK_SIZE - off);
EMIT(PPC_RAW_CMPLW(src_reg, _R0));
PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4);
@@ -953,30 +1007,48 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
* as there are two load instructions for dst_reg_h & dst_reg
* respectively.
*/
- if (size == BPF_DW)
+ if (size == BPF_DW ||
+ (size == BPF_B && BPF_MODE(code) == BPF_PROBE_MEMSX))
PPC_JMP((ctx->idx + 3) * 4);
else
PPC_JMP((ctx->idx + 2) * 4);
}
- switch (size) {
- case BPF_B:
- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
- break;
- case BPF_H:
- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
- break;
- case BPF_W:
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
- break;
- case BPF_DW:
- EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
- break;
- }
+ if (BPF_MODE(code) == BPF_MEMSX || BPF_MODE(code) == BPF_PROBE_MEMSX) {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ EMIT(PPC_RAW_EXTSB(dst_reg, dst_reg));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHA(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ }
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
- if (size != BPF_DW && !fp->aux->verifier_zext)
- EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+ }
+ if (size != BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
if (BPF_MODE(code) == BPF_PROBE_MEM) {
int insn_idx = ctx->idx - 1;
@@ -1068,6 +1140,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
case BPF_JMP | BPF_JA:
PPC_JMP(addrs[i + 1 + off]);
break;
+ case BPF_JMP32 | BPF_JA:
+ PPC_JMP(addrs[i + 1 + imm]);
+ break;
case BPF_JMP | BPF_JGT | BPF_K:
case BPF_JMP | BPF_JGT | BPF_X:
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 79f23974a320..8afc14a4a125 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -202,29 +202,47 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
EMIT(PPC_RAW_BLR());
}
-static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func)
+static int
+bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
{
unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
long reladdr;
- if (WARN_ON_ONCE(!core_kernel_text(func_addr)))
+ if (WARN_ON_ONCE(!kernel_text_address(func_addr)))
return -EINVAL;
- if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
- reladdr = func_addr - CTX_NIA(ctx);
-
- if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
- pr_err("eBPF: address of %ps out of range of pcrel address.\n",
- (void *)func);
- return -ERANGE;
- }
- /* pla r12,addr */
- EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
- EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
- EMIT(PPC_RAW_MTCTR(_R12));
- EMIT(PPC_RAW_BCTR());
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ reladdr = func_addr - local_paca->kernelbase;
+ if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
+ /* Align for subsequent prefix instruction */
+ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
+ EMIT(PPC_RAW_NOP());
+ /* paddi r12,r12,addr */
+ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr));
+ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr));
} else {
+ unsigned long pc = (unsigned long)fimage + CTX_NIA(ctx);
+ bool alignment_needed = !IS_ALIGNED(pc, 8);
+
+ reladdr = func_addr - (alignment_needed ? pc + 4 : pc);
+
+ if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ if (alignment_needed)
+ EMIT(PPC_RAW_NOP());
+ /* pla r12,addr */
+ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+ } else {
+ /* We can clobber r12 */
+ PPC_LI64(_R12, func);
+ }
+ }
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+#else
+ if (core_kernel_text(func_addr)) {
reladdr = func_addr - kernel_toc_addr();
if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func);
@@ -235,7 +253,32 @@ static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u
EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
EMIT(PPC_RAW_MTCTR(_R12));
EMIT(PPC_RAW_BCTRL());
+ } else {
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) {
+ /* func points to the function descriptor */
+ PPC_LI64(bpf_to_ppc(TMP_REG_2), func);
+ /* Load actual entry point from function descriptor */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0));
+ /* ... and move it to CTR */
+ EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1)));
+ /*
+ * Load TOC from function descriptor at offset 8.
+ * We can clobber r2 since we get called through a
+ * function pointer (so caller will save/restore r2).
+ */
+ EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8));
+ } else {
+ PPC_LI64(_R12, func);
+ EMIT(PPC_RAW_MTCTR(_R12));
+ }
+ EMIT(PPC_RAW_BCTRL());
+ /*
+ * Load r2 with kernel TOC as kernel TOC is used if function address falls
+ * within core kernel text.
+ */
+ EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)));
}
+#endif
return 0;
}
@@ -285,7 +328,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
int b2p_index = bpf_to_ppc(BPF_REG_3);
int bpf_tailcall_prologue_size = 8;
- if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
bpf_tailcall_prologue_size += 4; /* skip past the toc load */
/*
@@ -993,7 +1036,7 @@ emit_clear:
return ret;
if (func_addr_fixed)
- ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+ ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr);
else
ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 8f75e9574c27..8c1f3b629fc7 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -279,7 +279,7 @@ static void __init mpc512x_setup_diu(void)
* and so negatively affect boot time. Instead we reserve the
* already configured frame buffer area so that it won't be
* destroyed. The starting address of the area to reserve and
- * also it's length is passed to memblock_reserve(). It will be
+ * also its length is passed to memblock_reserve(). It will be
* freed later on first open of fbdev, when splash image is not
* needed any more.
*/
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 0b12647e7b42..0ec2522ee4ad 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -203,7 +203,8 @@ lite5200_wakeup:
/* HIDs, MSR */
LOAD_SPRN(HID1, 0x19)
- LOAD_SPRN(HID2, 0x1a)
+ /* FIXME: Should this use HID2_G2_LE? */
+ LOAD_SPRN(HID2_750FX, 0x1a)
/* address translation is tricky (see turn_on_mmu) */
@@ -283,7 +284,8 @@ SYM_FUNC_START_LOCAL(save_regs)
SAVE_SPRN(HID0, 0x18)
SAVE_SPRN(HID1, 0x19)
- SAVE_SPRN(HID2, 0x1a)
+ /* FIXME: Should this use HID2_G2_LE? */
+ SAVE_SPRN(HID2_750FX, 0x1a)
mfmsr r10
stw r10, (4*0x1b)(r4)
/*SAVE_SPRN(LR, 0x1c) have to save it before the call */
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index b4938e344f71..253421ffb4e5 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -12,12 +12,10 @@
#undef DEBUG
-#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
-#include <linux/of_gpio.h>
#include <linux/export.h>
#include <asm/io.h>
#include <asm/mpc52xx.h>
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 581059527c36..2bd6abcdc113 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -48,6 +48,7 @@
* the output mode. This driver does not change the output mode setting.
*/
+#include <linux/gpio/driver.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -56,7 +57,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <linux/platform_device.h>
#include <linux/kernel.h>
#include <linux/property.h>
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
index bc6bd4d0ae96..6a62ed6082c9 100644
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -68,7 +68,8 @@ _GLOBAL(mpc83xx_enter_deep_sleep)
mfspr r5, SPRN_HID0
mfspr r6, SPRN_HID1
- mfspr r7, SPRN_HID2
+ /* FIXME: Should this use SPRN_HID2_G2_LE? */
+ mfspr r7, SPRN_HID2_750FX
stw r5, SS_HID+0(r3)
stw r6, SS_HID+4(r3)
@@ -396,7 +397,8 @@ mpc83xx_deep_resume:
mtspr SPRN_HID0, r5
mtspr SPRN_HID1, r6
- mtspr SPRN_HID2, r7
+ /* FIXME: Should this use SPRN_HID2_G2_LE? */
+ mtspr SPRN_HID2_750FX, r7
lwz r4, SS_IABR+0(r3)
lwz r5, SS_IABR+4(r3)
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 40aa58206888..e52b848b64b7 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -398,6 +398,7 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
hard_irq_disable();
mpic_teardown_this_cpu(secondary);
+#ifdef CONFIG_CRASH_DUMP
if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
/*
* We enter the crash kernel on whatever cpu crashed,
@@ -406,9 +407,11 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
*/
disable_threadbit = 1;
disable_cpu = cpu_first_thread_sibling(cpu);
- } else if (sibling != crashing_cpu &&
- cpu_thread_in_core(cpu) == 0 &&
- cpu_thread_in_core(sibling) != 0) {
+ } else if (sibling == crashing_cpu) {
+ return;
+ }
+#endif
+ if (cpu_thread_in_core(cpu) == 0 && cpu_thread_in_core(sibling) != 0) {
disable_threadbit = 2;
disable_cpu = sibling;
}
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 1202a69b0a20..4cd9c0de22c2 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -424,23 +424,6 @@ static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu,
cell_iommu_enable_hardware(iommu);
}
-#if 0/* Unused for now */
-static struct iommu_window *find_window(struct cbe_iommu *iommu,
- unsigned long offset, unsigned long size)
-{
- struct iommu_window *window;
-
- /* todo: check for overlapping (but not equal) windows) */
-
- list_for_each_entry(window, &(iommu->windows), list) {
- if (window->offset == offset && window->size == size)
- return window;
- }
-
- return NULL;
-}
-#endif
-
static inline u32 cell_iommu_get_ioid(struct device_node *np)
{
const u32 *ioid;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 30394c6f8894..fee638fd8970 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -54,6 +54,7 @@ static cpumask_t of_spin_map;
/**
* smp_startup_cpu() - start the given cpu
+ * @lcpu: Logical CPU ID of the CPU to be started.
*
* At boot time, there is nothing to do for primary threads which were
* started from Open Firmware. For anything else, call RTAS with the
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 02a8158c469d..7f4e0db8eb08 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1704,23 +1704,11 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
ret = spu_acquire(ctx);
if (ret)
- goto out;
-#if 0
-/* this currently hangs */
- ret = spufs_wait(ctx->mfc_wq,
- ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
- if (ret)
- goto out;
- ret = spufs_wait(ctx->mfc_wq,
- ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
- if (ret)
- goto out;
-#else
- ret = 0;
-#endif
+ return ret;
+
spu_release(ctx);
-out:
- return ret;
+
+ return 0;
}
static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 99bd027a7f7c..610ca8570682 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -868,7 +868,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
}
/**
- * spu_deactivate - unbind a context from it's physical spu
+ * spu_deactivate - unbind a context from its physical spu
* @ctx: spu context to unbind
*
* Unbind @ctx from the physical spu it is running on and schedule
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index b911b31717cc..b9ff37c7f6f0 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -595,7 +595,7 @@ void __init maple_pci_init(void)
/* Probe root PCI hosts, that is on U3 the AGP host and the
* HyperTransport host. That one is actually "kept" around
- * and actually added last as it's resource management relies
+ * and actually added last as its resource management relies
* on the AGP resources to have been setup first
*/
root = of_find_node_by_path("/");
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 7135ea1d7db6..2202bf77c7a3 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -2,7 +2,7 @@
/*
* Support for the interrupt controllers found on Power Macintosh,
* currently Apple's "Grand Central" interrupt controller in all
- * it's incarnations. OpenPIC support used on newer machines is
+ * its incarnations. OpenPIC support used on newer machines is
* in a separate file
*
* Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index d497a60003d2..822ed70cdcbf 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -176,7 +176,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
* memory location containing the PC to resume from
* at address 0.
* - On Core99, we must store the wakeup vector at
- * address 0x80 and eventually it's parameters
+ * address 0x80 and eventually its parameters
* at address 0x84. I've have some trouble with those
* parameters however and I no longer use them.
*/
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
index 964f464b1b0e..c9c1dfb35464 100644
--- a/arch/powerpc/platforms/powernv/opal-fadump.c
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -513,8 +513,8 @@ out:
final_note(note_buf);
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ fadump_conf->elfcorehdr_addr);
+ fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr);
return 0;
}
@@ -526,12 +526,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf)
if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
return rc;
- /* Validate the fadump crash info header */
fdh = __va(fadump_conf->fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- pr_err("Crash info header is not valid.\n");
- return rc;
- }
#ifdef CONFIG_OPAL_CORE
/*
@@ -545,18 +540,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf)
kernel_initiated = true;
#endif
- rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
-
- return rc;
+ return opal_fadump_build_cpu_notes(fadump_conf, fdh);
}
static void opal_fadump_region_show(struct fw_dump *fadump_conf,
@@ -615,6 +599,12 @@ static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
pr_emerg("No backend support for MPIPL!\n");
}
+/* FADUMP_MAX_MEM_REGS or lower */
+static int opal_fadump_max_boot_mem_rgns(void)
+{
+ return FADUMP_MAX_MEM_REGS;
+}
+
static struct fadump_ops opal_fadump_ops = {
.fadump_init_mem_struct = opal_fadump_init_mem_struct,
.fadump_get_metadata_size = opal_fadump_get_metadata_size,
@@ -627,6 +617,7 @@ static struct fadump_ops opal_fadump_ops = {
.fadump_process = opal_fadump_process,
.fadump_region_show = opal_fadump_region_show,
.fadump_trigger = opal_fadump_trigger,
+ .fadump_max_boot_mem_rgns = opal_fadump_max_boot_mem_rgns,
};
void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
@@ -674,8 +665,10 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
}
}
- fadump_conf->ops = &opal_fadump_ops;
- fadump_conf->fadump_supported = 1;
+ fadump_conf->ops = &opal_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+ /* TODO: Add support to pass additional parameters */
+ fadump_conf->param_area_supported = 0;
/*
* Firmware supports 32-bit field for size. Align it to PAGE_SIZE
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 59882da3e742..cc7b1dd54ac6 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -238,7 +238,7 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
} else if (pdev->is_physfn) {
/*
* For PFs adjust their allocated IOV resources to match what
- * the PHB can support using it's M64 BAR table.
+ * the PHB can support using its M64 BAR table.
*/
pnv_pci_ioda_fixup_iov_resources(pdev);
}
@@ -658,7 +658,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
list_add_tail(&pe->list, &phb->ioda.pe_list);
mutex_unlock(&phb->ioda.pe_list_mutex);
- /* associate this pe to it's pdn */
+ /* associate this pe to its pdn */
list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
if (vf_pdn->busno == vf_bus &&
vf_pdn->devfn == vf_devfn) {
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index b664838008c1..5147df3a18ac 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1059,7 +1059,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
}
} else {
/*
- * Interrupt hanlder or fault window setup failed. Means
+ * Interrupt handler or fault window setup failed. Means
* NX can not generate fault for page fault. So not
* opening for user space tx window.
*/
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index 878bc160246e..b18e1c92e554 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -770,49 +770,51 @@ static struct task_struct *probe_task;
static int ps3_probe_thread(void *data)
{
- struct ps3_notification_device dev;
+ struct {
+ struct ps3_notification_device dev;
+ u8 buf[512];
+ } *local;
+ struct ps3_notify_cmd *notify_cmd;
+ struct ps3_notify_event *notify_event;
int res;
unsigned int irq;
u64 lpar;
- void *buf;
- struct ps3_notify_cmd *notify_cmd;
- struct ps3_notify_event *notify_event;
pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__);
- buf = kzalloc(512, GFP_KERNEL);
- if (!buf)
+ local = kzalloc(sizeof(*local), GFP_KERNEL);
+ if (!local)
return -ENOMEM;
- lpar = ps3_mm_phys_to_lpar(__pa(buf));
- notify_cmd = buf;
- notify_event = buf;
+ lpar = ps3_mm_phys_to_lpar(__pa(&local->buf));
+ notify_cmd = (struct ps3_notify_cmd *)&local->buf;
+ notify_event = (struct ps3_notify_event *)&local->buf;
/* dummy system bus device */
- dev.sbd.bus_id = (u64)data;
- dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID;
- dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID;
+ local->dev.sbd.bus_id = (u64)data;
+ local->dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID;
+ local->dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID;
- res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0);
+ res = lv1_open_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id, 0);
if (res) {
pr_err("%s:%u: lv1_open_device failed %s\n", __func__,
__LINE__, ps3_result(res));
goto fail_free;
}
- res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY,
- &irq);
+ res = ps3_sb_event_receive_port_setup(&local->dev.sbd,
+ PS3_BINDING_CPU_ANY, &irq);
if (res) {
pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n",
__func__, __LINE__, res);
goto fail_close_device;
}
- spin_lock_init(&dev.lock);
- rcuwait_init(&dev.wait);
+ spin_lock_init(&local->dev.lock);
+ rcuwait_init(&local->dev.wait);
res = request_irq(irq, ps3_notification_interrupt, 0,
- "ps3_notification", &dev);
+ "ps3_notification", &local->dev);
if (res) {
pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__,
res);
@@ -823,7 +825,7 @@ static int ps3_probe_thread(void *data)
notify_cmd->operation_code = 0; /* must be zero */
notify_cmd->event_mask = 1UL << notify_region_probe;
- res = ps3_notification_read_write(&dev, lpar, 1);
+ res = ps3_notification_read_write(&local->dev, lpar, 1);
if (res)
goto fail_free_irq;
@@ -834,36 +836,37 @@ static int ps3_probe_thread(void *data)
memset(notify_event, 0, sizeof(*notify_event));
- res = ps3_notification_read_write(&dev, lpar, 0);
+ res = ps3_notification_read_write(&local->dev, lpar, 0);
if (res)
break;
pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu"
" type %llu port %llu\n", __func__, __LINE__,
- notify_event->event_type, notify_event->bus_id,
- notify_event->dev_id, notify_event->dev_type,
- notify_event->dev_port);
+ notify_event->event_type, notify_event->bus_id,
+ notify_event->dev_id, notify_event->dev_type,
+ notify_event->dev_port);
if (notify_event->event_type != notify_region_probe ||
- notify_event->bus_id != dev.sbd.bus_id) {
+ notify_event->bus_id != local->dev.sbd.bus_id) {
pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
__func__, __LINE__, notify_event->event_type,
notify_event->dev_id, notify_event->dev_type);
continue;
}
- ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id);
+ ps3_find_and_add_device(local->dev.sbd.bus_id,
+ notify_event->dev_id);
} while (!kthread_should_stop());
fail_free_irq:
- free_irq(irq, &dev);
+ free_irq(irq, &local->dev);
fail_sb_event_receive_port_destroy:
- ps3_sb_event_receive_port_destroy(&dev.sbd, irq);
+ ps3_sb_event_receive_port_destroy(&local->dev.sbd, irq);
fail_close_device:
- lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id);
+ lv1_close_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id);
fail_free:
- kfree(buf);
+ kfree(local);
probe_task = NULL;
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index f936962a2946..7bf506f6b8c8 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 4e9916bb03d7..c1d8bee8f701 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1886,10 +1886,10 @@ out:
* h_get_mpp
* H_GET_MPP hcall returns info in 7 parms
*/
-int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+long h_get_mpp(struct hvcall_mpp_data *mpp_data)
{
- int rc;
- unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
rc = plpar_hcall9(H_GET_MPP, retbuf);
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index f73c4d1c26af..6e7029640c0c 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -113,8 +113,8 @@ struct hvcall_ppp_data {
*/
static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
rc = plpar_hcall9(H_GET_PPP, retbuf);
@@ -170,20 +170,24 @@ out:
kfree(buf);
}
-static unsigned h_pic(unsigned long *pool_idle_time,
- unsigned long *num_procs)
+static long h_pic(unsigned long *pool_idle_time,
+ unsigned long *num_procs)
{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = {0};
rc = plpar_hcall(H_PIC, retbuf);
- *pool_idle_time = retbuf[0];
- *num_procs = retbuf[1];
+ if (pool_idle_time)
+ *pool_idle_time = retbuf[0];
+ if (num_procs)
+ *num_procs = retbuf[1];
return rc;
}
+unsigned long boot_pool_idle_time;
+
/*
* parse_ppp_data
* Parse out the data returned from h_get_ppp and h_pic
@@ -193,7 +197,7 @@ static void parse_ppp_data(struct seq_file *m)
struct hvcall_ppp_data ppp_data;
struct device_node *root;
const __be32 *perf_level;
- int rc;
+ long rc;
rc = h_get_ppp(&ppp_data);
if (rc)
@@ -215,9 +219,15 @@ static void parse_ppp_data(struct seq_file *m)
seq_printf(m, "pool_capacity=%d\n",
ppp_data.active_procs_in_pool * 100);
- h_pic(&pool_idle_time, &pool_procs);
- seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
- seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ /* In case h_pic call is not successful, this would result in
+ * APP values being wrong in tools like lparstat.
+ */
+
+ if (h_pic(&pool_idle_time, &pool_procs) == H_SUCCESS) {
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ seq_printf(m, "boot_pool_idle_time=%ld\n", boot_pool_idle_time);
+ }
}
seq_printf(m, "unallocated_capacity_weight=%d\n",
@@ -792,6 +802,7 @@ static const struct proc_ops lparcfg_proc_ops = {
static int __init lparcfg_init(void)
{
umode_t mode = 0444;
+ long retval;
/* Allow writing if we have FW_FEATURE_SPLPAR */
if (firmware_has_feature(FW_FEATURE_SPLPAR))
@@ -801,6 +812,16 @@ static int __init lparcfg_init(void)
printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
return -EIO;
}
+
+ /* If this call fails, it would result in APP values
+ * being wrong for since boot reports of lparstat
+ */
+ retval = h_pic(&boot_pool_idle_time, NULL);
+
+ if (retval != H_SUCCESS)
+ pr_debug("H_PIC failed during lparcfg init retval: %ld\n",
+ retval);
+
return 0;
}
machine_device_initcall(pseries, lparcfg_init);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index c233f9db039b..9b6420eb3567 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -16,7 +16,8 @@
#include <linux/nd.h>
#include <asm/plpar_wrappers.h>
-#include <asm/papr_pdsm.h>
+#include <uapi/linux/papr_pdsm.h>
+#include <linux/papr_scm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
#include <linux/perf_event.h>
@@ -29,46 +30,6 @@
(1ul << ND_CMD_SET_CONFIG_DATA) | \
(1ul << ND_CMD_CALL))
-/* DIMM health bitmap indicators */
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are persisted from previous IPL */
-#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-/* SCM device memory life remaining is critically low */
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-/* SCM device is unable to persist memory contents in certain conditions */
-#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
-/* SCM device is encrypted */
-#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8))
-/* SCM device has been scrubbed and locked */
-#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
-#define PAPR_SCM_PERF_STATS_VERSION 0x1
-
/* Struct holding a single performance metric */
struct papr_scm_perf_stat {
u8 stat_id[8];
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1772ae3d193d..6dbc73eb2ca2 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -18,33 +18,6 @@
#include <asm/pci.h>
#include "pseries.h"
-#if 0
-void pcibios_name_device(struct pci_dev *dev)
-{
- struct device_node *dn;
-
- /*
- * Add IBM loc code (slot) as a prefix to the device names for service
- */
- dn = pci_device_to_OF_node(dev);
- if (dn) {
- const char *loc_code = of_get_property(dn, "ibm,loc-code",
- NULL);
- if (loc_code) {
- int loc_len = strlen(loc_code);
- if (loc_len < sizeof(dev->dev.name)) {
- memmove(dev->dev.name+loc_len+1, dev->dev.name,
- sizeof(dev->dev.name)-loc_len-1);
- memcpy(dev->dev.name, loc_code, loc_len);
- dev->dev.name[loc_len] = ' ';
- dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
- }
- }
- }
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
-#endif
-
#ifdef CONFIG_PCI_IOV
#define MAX_VFS_FOR_MAP_PE 256
struct pe_map_bar_entry {
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index b5853e9fcc3c..eceb3289383e 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -18,6 +18,7 @@
#include <asm/page.h>
#include <asm/rtas.h>
+#include <asm/setup.h>
#include <asm/fadump.h>
#include <asm/fadump-internal.h>
@@ -29,9 +30,6 @@ static const struct rtas_fadump_mem_struct *fdm_active;
static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
- fadump_conf->boot_mem_dest_addr =
- be64_to_cpu(fdm->rmr_region.destination_address);
-
fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
fadump_conf->boot_memory_size);
}
@@ -43,20 +41,56 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
- fadump_conf->boot_mem_addr[0] =
- be64_to_cpu(fdm->rmr_region.source_address);
- fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
- fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+ unsigned long base, size, last_end, hole_size;
- fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
- fadump_conf->boot_mem_regs_cnt = 1;
+ last_end = 0;
+ hole_size = 0;
+ fadump_conf->boot_memory_size = 0;
+ fadump_conf->boot_mem_regs_cnt = 0;
+ pr_debug("Boot memory regions:\n");
+ for (int i = 0; i < be16_to_cpu(fdm->header.dump_num_sections); i++) {
+ int type = be16_to_cpu(fdm->rgn[i].source_data_type);
+ u64 addr;
- /*
- * Start address of reserve dump area (permanent reservation) for
- * re-registering FADump after dump capture.
- */
- fadump_conf->reserve_dump_area_start =
- be64_to_cpu(fdm->cpu_state_data.destination_address);
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ addr = be64_to_cpu(fdm->rgn[i].destination_address);
+
+ fadump_conf->cpu_state_dest_vaddr = (u64)__va(addr);
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start = addr;
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ /* Not processed currently. */
+ break;
+ case RTAS_FADUMP_REAL_MODE_REGION:
+ base = be64_to_cpu(fdm->rgn[i].source_address);
+ size = be64_to_cpu(fdm->rgn[i].source_len);
+ pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+ if (!base) {
+ fadump_conf->boot_mem_dest_addr =
+ be64_to_cpu(fdm->rgn[i].destination_address);
+ }
+
+ fadump_conf->boot_mem_addr[fadump_conf->boot_mem_regs_cnt] = base;
+ fadump_conf->boot_mem_sz[fadump_conf->boot_mem_regs_cnt] = size;
+ fadump_conf->boot_memory_size += size;
+ hole_size += (base - last_end);
+ last_end = base + size;
+ fadump_conf->boot_mem_regs_cnt++;
+ break;
+ case RTAS_FADUMP_PARAM_AREA:
+ fadump_conf->param_area = be64_to_cpu(fdm->rgn[i].destination_address);
+ break;
+ default:
+ pr_warn("Section type %d unsupported on this kernel. Ignoring!\n", type);
+ break;
+ }
+ }
+ fadump_conf->boot_mem_top = fadump_conf->boot_memory_size + hole_size;
rtas_fadump_update_config(fadump_conf, fdm);
}
@@ -64,16 +98,15 @@ static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
{
u64 addr = fadump_conf->reserve_dump_area_start;
+ u16 sec_cnt = 0;
memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
addr = addr & PAGE_MASK;
fdm.header.dump_format_version = cpu_to_be32(0x00000001);
- fdm.header.dump_num_sections = cpu_to_be16(3);
fdm.header.dump_status_flag = 0;
fdm.header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
- cpu_state_data));
+ cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, rgn));
/*
* Fields for disk dump option.
@@ -89,25 +122,22 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
/* Kernel dump sections */
/* cpu state data section. */
- fdm.cpu_state_data.request_flag =
- cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
- fdm.cpu_state_data.source_data_type =
- cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
- fdm.cpu_state_data.source_address = 0;
- fdm.cpu_state_data.source_len =
- cpu_to_be64(fadump_conf->cpu_state_data_size);
- fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+ fdm.rgn[sec_cnt].source_address = 0;
+ fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
addr += fadump_conf->cpu_state_data_size;
+ sec_cnt++;
/* hpte region section */
- fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
- fdm.hpte_region.source_data_type =
- cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
- fdm.hpte_region.source_address = 0;
- fdm.hpte_region.source_len =
- cpu_to_be64(fadump_conf->hpte_region_size);
- fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+ fdm.rgn[sec_cnt].source_address = 0;
+ fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
addr += fadump_conf->hpte_region_size;
+ sec_cnt++;
/*
* Align boot memory area destination address to page boundary to
@@ -115,14 +145,29 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
*/
addr = PAGE_ALIGN(addr);
- /* RMA region section */
- fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
- fdm.rmr_region.source_data_type =
- cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
- fdm.rmr_region.source_address = cpu_to_be64(0);
- fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
- fdm.rmr_region.destination_address = cpu_to_be64(addr);
- addr += fadump_conf->boot_memory_size;
+ /* First boot memory region destination address */
+ fadump_conf->boot_mem_dest_addr = addr;
+ for (int i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+ /* Boot memory regions */
+ fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+ fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->boot_mem_addr[i]);
+ fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->boot_mem_sz[i]);
+ fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_mem_sz[i];
+ sec_cnt++;
+ }
+
+ /* Parameters area */
+ if (fadump_conf->param_area) {
+ fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_PARAM_AREA);
+ fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->param_area);
+ fdm.rgn[sec_cnt].source_len = cpu_to_be64(COMMAND_LINE_SIZE);
+ fdm.rgn[sec_cnt].destination_address = cpu_to_be64(fadump_conf->param_area);
+ sec_cnt++;
+ }
+ fdm.header.dump_num_sections = cpu_to_be16(sec_cnt);
rtas_fadump_update_config(fadump_conf, &fdm);
@@ -136,14 +181,21 @@ static u64 rtas_fadump_get_bootmem_min(void)
static int rtas_fadump_register(struct fw_dump *fadump_conf)
{
- unsigned int wait_time;
+ unsigned int wait_time, fdm_size;
int rc, err = -EIO;
+ /*
+ * Platform requires the exact size of the Dump Memory Structure.
+ * Avoid including any unused rgns in the calculation, as this
+ * could result in a parameter error (-3) from the platform.
+ */
+ fdm_size = sizeof(struct rtas_fadump_section_header);
+ fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
+
/* TODO: Add upper time limit for the delay */
do {
rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
- NULL, FADUMP_REGISTER, &fdm,
- sizeof(struct rtas_fadump_mem_struct));
+ NULL, FADUMP_REGISTER, &fdm, fdm_size);
wait_time = rtas_busy_delay_time(rc);
if (wait_time)
@@ -161,9 +213,7 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
pr_err("Failed to register. Hardware Error(%d).\n", rc);
break;
case -3:
- if (!is_fadump_boot_mem_contiguous())
- pr_err("Can't have holes in boot memory area.\n");
- else if (!is_fadump_reserved_mem_contiguous())
+ if (!is_fadump_reserved_mem_contiguous())
pr_err("Can't have holes in reserved memory area.\n");
pr_err("Failed to register. Parameter Error(%d).\n", rc);
@@ -316,11 +366,9 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
u32 num_cpus, *note_buf;
int i, rc = 0, cpu = 0;
struct pt_regs regs;
- unsigned long addr;
void *vaddr;
- addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
- vaddr = __va(addr);
+ vaddr = (void *)fadump_conf->cpu_state_dest_vaddr;
reg_header = vaddr;
if (be64_to_cpu(reg_header->magic_number) !=
@@ -375,11 +423,8 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
}
final_note(note_buf);
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n", fadump_conf->elfcorehdr_addr);
+ fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr);
return 0;
error_out:
@@ -389,57 +434,66 @@ error_out:
}
/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
+ * Validate and process the dump data stored by the firmware, and update
+ * the CPU notes of elfcorehdr.
*/
static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
if (!fdm_active || !fadump_conf->fadumphdr_addr)
return -EINVAL;
/* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
- RTAS_FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- pr_err("Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- pr_err("Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
+ for (int i = 0; i < be16_to_cpu(fdm_active->header.dump_num_sections); i++) {
+ int type = be16_to_cpu(fdm_active->rgn[i].source_data_type);
+ int rc = 0;
- /* Validate the fadump crash info header */
- fdh = __va(fadump_conf->fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- pr_err("Crash info header is not valid.\n");
- return -EINVAL;
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ case RTAS_FADUMP_HPTE_REGION:
+ case RTAS_FADUMP_REAL_MODE_REGION:
+ if (fdm_active->rgn[i].error_flags != 0) {
+ pr_err("Dump taken by platform is not valid (%d)\n", i);
+ rc = -EINVAL;
+ }
+ if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len) {
+ pr_err("Dump taken by platform is incomplete (%d)\n", i);
+ rc = -EINVAL;
+ }
+ if (rc) {
+ pr_warn("Region type: %u src addr: 0x%llx dest addr: 0x%llx\n",
+ be16_to_cpu(fdm_active->rgn[i].source_data_type),
+ be64_to_cpu(fdm_active->rgn[i].source_address),
+ be64_to_cpu(fdm_active->rgn[i].destination_address));
+ return rc;
+ }
+ break;
+ case RTAS_FADUMP_PARAM_AREA:
+ if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len ||
+ fdm_active->rgn[i].error_flags != 0) {
+ pr_warn("Failed to process additional parameters! Proceeding anyway..\n");
+ fadump_conf->param_area = 0;
+ }
+ break;
+ default:
+ /*
+ * If the first/crashed kernel added a new region type that the
+ * second/fadump kernel doesn't recognize, skip it and process
+ * assuming backward compatibility.
+ */
+ pr_warn("Unknown region found: type: %u src addr: 0x%llx dest addr: 0x%llx\n",
+ be16_to_cpu(fdm_active->rgn[i].source_data_type),
+ be64_to_cpu(fdm_active->rgn[i].source_address),
+ be64_to_cpu(fdm_active->rgn[i].destination_address));
+ break;
+ }
}
- rc = rtas_fadump_build_cpu_notes(fadump_conf);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
-
- return 0;
+ return rtas_fadump_build_cpu_notes(fadump_conf);
}
static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
struct seq_file *m)
{
- const struct rtas_fadump_section *cpu_data_section;
const struct rtas_fadump_mem_struct *fdm_ptr;
if (fdm_active)
@@ -447,27 +501,49 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
else
fdm_ptr = &fdm;
- cpu_data_section = &(fdm_ptr->cpu_state_data);
- seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
- be64_to_cpu(cpu_data_section->destination_address),
- be64_to_cpu(cpu_data_section->destination_address) +
- be64_to_cpu(cpu_data_section->source_len) - 1,
- be64_to_cpu(cpu_data_section->source_len),
- be64_to_cpu(cpu_data_section->bytes_dumped));
-
- seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
-
- seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
- be64_to_cpu(fdm_ptr->rmr_region.source_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address));
- seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ for (int i = 0; i < be16_to_cpu(fdm_ptr->header.dump_num_sections); i++) {
+ int type = be16_to_cpu(fdm_ptr->rgn[i].source_data_type);
+
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address),
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
+ be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
+ be64_to_cpu(fdm_ptr->rgn[i].source_len),
+ be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address),
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
+ be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
+ be64_to_cpu(fdm_ptr->rgn[i].source_len),
+ be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
+ break;
+ case RTAS_FADUMP_REAL_MODE_REGION:
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rgn[i].source_address),
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rgn[i].source_len),
+ be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped));
+ break;
+ case RTAS_FADUMP_PARAM_AREA:
+ seq_printf(m, "\n[%#016llx-%#016llx]: cmdline append: '%s'\n",
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address),
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address) +
+ be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1,
+ (char *)__va(be64_to_cpu(fdm_ptr->rgn[i].destination_address)));
+ break;
+ default:
+ seq_printf(m, "Unknown region type %d : Src: %#016llx, Dest: %#016llx, ",
+ type, be64_to_cpu(fdm_ptr->rgn[i].source_address),
+ be64_to_cpu(fdm_ptr->rgn[i].destination_address));
+ break;
+ }
+ }
/* Dump is active. Show preserved area start address. */
if (fdm_active) {
@@ -483,6 +559,20 @@ static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
rtas_os_term((char *)msg);
}
+/* FADUMP_MAX_MEM_REGS or lower */
+static int rtas_fadump_max_boot_mem_rgns(void)
+{
+ /*
+ * Version 1 of Kernel Assisted Dump Memory Structure (PAPR) supports 10 sections.
+ * With one each section taken for CPU state data & HPTE respectively, 8 sections
+ * can be used for boot memory regions.
+ *
+ * If new region(s) is(are) defined, maximum boot memory regions will decrease
+ * proportionally.
+ */
+ return RTAS_FADUMP_MAX_BOOT_MEM_REGS;
+}
+
static struct fadump_ops rtas_fadump_ops = {
.fadump_init_mem_struct = rtas_fadump_init_mem_struct,
.fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
@@ -492,6 +582,7 @@ static struct fadump_ops rtas_fadump_ops = {
.fadump_process = rtas_fadump_process,
.fadump_region_show = rtas_fadump_region_show,
.fadump_trigger = rtas_fadump_trigger,
+ .fadump_max_boot_mem_rgns = rtas_fadump_max_boot_mem_rgns,
};
void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
@@ -508,9 +599,10 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
if (!token)
return;
- fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
- fadump_conf->ops = &rtas_fadump_ops;
- fadump_conf->fadump_supported = 1;
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+ fadump_conf->ops = &rtas_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+ fadump_conf->param_area_supported = 1;
/* Firmware supports 64-bit value for size, align it to pagesize. */
fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE);
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
index fd59bd7ca9c3..c109abf6befd 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.h
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -23,12 +23,24 @@
#define RTAS_FADUMP_HPTE_REGION 0x0002
#define RTAS_FADUMP_REAL_MODE_REGION 0x0011
+/* OS defined sections */
+#define RTAS_FADUMP_PARAM_AREA 0x0100
+
/* Dump request flag */
#define RTAS_FADUMP_REQUEST_FLAG 0x00000001
/* Dump status flag */
#define RTAS_FADUMP_ERROR_FLAG 0x2000
+/*
+ * The Firmware Assisted Dump Memory structure supports a maximum of 10 sections
+ * in the dump memory structure. Presently, three sections are used for
+ * CPU state data, HPTE & Parameters area, while the remaining seven sections
+ * can be used for boot memory regions.
+ */
+#define MAX_SECTIONS 10
+#define RTAS_FADUMP_MAX_BOOT_MEM_REGS 7
+
/* Kernel Dump section info */
struct rtas_fadump_section {
__be32 request_flag;
@@ -61,20 +73,15 @@ struct rtas_fadump_section_header {
* Firmware Assisted dump memory structure. This structure is required for
* registering future kernel dump with power firmware through rtas call.
*
- * No disk dump option. Hence disk dump path string section is not included.
+ * In version 1, the platform permits one section header, dump-disk path
+ * and ten sections.
+ *
+ * Note: No disk dump option. Hence disk dump path string section is not
+ * included.
*/
struct rtas_fadump_mem_struct {
struct rtas_fadump_section_header header;
-
- /* Kernel dump sections */
- struct rtas_fadump_section cpu_state_data;
- struct rtas_fadump_section hpte_region;
-
- /*
- * TODO: Extend multiple boot memory regions support in the kernel
- * for this platform.
- */
- struct rtas_fadump_section rmr_region;
+ struct rtas_fadump_section rgn[MAX_SECTIONS];
};
/*
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 71d52a670d95..ba3fb7a7f2ea 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -228,7 +228,7 @@ static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
struct pseries_vas_window *txwin = data;
/*
- * The thread hanlder will process this interrupt if it is
+ * The thread handler will process this interrupt if it is
* already running.
*/
atomic_inc(&txwin->pending_faults);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 90ff85c879bf..b2babfdbc40b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1592,13 +1592,9 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env)
const char *cp;
dn = dev->of_node;
- if (!dn)
- return -ENODEV;
- cp = of_get_property(dn, "compatible", NULL);
- if (!cp)
- return -ENODEV;
+ if (dn && (cp = of_get_property(dn, "compatible", NULL)))
+ add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
- add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
return 0;
}
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 9cb1d029511a..24a177d164f1 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o
obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y)
obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 98096bbfd62e..c0d10c149661 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -24,7 +24,6 @@
#include <linux/suspend.h>
#include <linux/memblock.h>
#include <linux/gfp.h>
-#include <linux/kmemleak.h>
#include <linux/of_address.h>
#include <asm/io.h>
#include <asm/iommu.h>
@@ -243,9 +242,6 @@ static void __init allocate_dart(void)
if (!dart_tablebase)
panic("Failed to allocate 16MB below 2GB for DART table\n");
- /* There is no point scanning the DART space for leaks*/
- kmemleak_no_scan((void *)dart_tablebase);
-
/* Allocate a spare page to map all invalid DART pages. We need to do
* that to work around what looks like a problem with the HT bridge
* prefetching into invalid pages and corrupting data
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index 39186ad6b3c3..3dabc9621810 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -77,7 +77,7 @@ struct gtm {
static LIST_HEAD(gtms);
/**
- * gtm_get_timer - request GTM timer to use it with the rest of GTM API
+ * gtm_get_timer16 - request GTM timer to use it with the rest of GTM API
* Context: non-IRQ
*
* This function reserves GTM timer for later use. It returns gtm_timer
@@ -110,7 +110,7 @@ struct gtm_timer *gtm_get_timer16(void)
EXPORT_SYMBOL(gtm_get_timer16);
/**
- * gtm_get_specific_timer - request specific GTM timer
+ * gtm_get_specific_timer16 - request specific GTM timer
* @gtm: specific GTM, pass here GTM's device_node->data
* @timer: specific timer number, Timer1 is 0.
* Context: non-IRQ
@@ -260,7 +260,7 @@ int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec, bool reload)
EXPORT_SYMBOL(gtm_set_timer16);
/**
- * gtm_set_exact_utimer16 - (re)set 16 bits timer
+ * gtm_set_exact_timer16 - (re)set 16 bits timer
* @tmr: pointer to the gtm_timer structure obtained from gtm_get_timer
* @usec: timer interval in microseconds
* @reload: if set, the timer will reset upon expiry rather than
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 8e6c84df4ca1..e205135ae1fe 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -564,10 +564,12 @@ static const struct fsl_msi_feature ipic_msi_feature = {
.msiir_offset = 0x38,
};
+#ifdef CONFIG_EPAPR_PARAVIRT
static const struct fsl_msi_feature vmpic_msi_feature = {
.fsl_pic_ip = FSL_PIC_IP_VMPIC,
.msiir_offset = 0,
};
+#endif
static const struct of_device_id fsl_of_msi_ids[] = {
{
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a289cb97c1d7..fa01818c1972 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -383,7 +383,7 @@ static unsigned int xive_get_irq(void)
* CPU.
*
* If we find that there is indeed more in there, we call
- * force_external_irq_replay() to make Linux synthetize an
+ * force_external_irq_replay() to make Linux synthesize an
* external interrupt on the next call to local_irq_restore().
*/
static void xive_do_queue_eoi(struct xive_cpu *xc)
@@ -874,7 +874,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
*
* This also tells us that it's in flight to a host queue
* or has already been fetched but hasn't been EOIed yet
- * by the host. This it's potentially using up a host
+ * by the host. Thus it's potentially using up a host
* queue slot. This is important to know because as long
* as this is the case, we must not hard-unmask it when
* "returning" that interrupt to the host.
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index f1c0fa6ece21..517b963e3e6a 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -415,7 +415,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
return;
}
- /* Grab it's CAM value */
+ /* Grab its CAM value */
rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
if (rc) {
pr_err("Failed to get pool VP info CPU %d\n", cpu);
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 682c7c0a6f77..d778011060a8 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -10,8 +10,6 @@ KCSAN_SANITIZE := n
# Disable ftrace for the entire directory
ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
# Clang stores addresses on the stack causing the frame size to blow
# out. See https://github.com/ClangBuiltLinux/linux/issues/252
ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d79d6633f333..bd4813bad317 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1350,7 +1350,7 @@ static int cpu_cmd(void)
}
termch = cpu;
- if (!scanhex(&cpu)) {
+ if (!scanhex(&cpu) || cpu >= num_possible_cpus()) {
/* print cpus waiting or in xmon */
printf("cpus stopped:");
last_cpu = first_cpu = NR_CPUS;
@@ -2772,7 +2772,7 @@ static void dump_pacas(void)
termch = c; /* Put c back, it wasn't 'a' */
- if (scanhex(&num))
+ if (scanhex(&num) && num < num_possible_cpus())
dump_one_paca(num);
else
dump_one_paca(xmon_owner);
@@ -2845,7 +2845,7 @@ static void dump_xives(void)
termch = c; /* Put c back, it wasn't 'a' */
- if (scanhex(&num))
+ if (scanhex(&num) && num < num_possible_cpus())
dump_one_xive(num);
else
dump_one_xive(xmon_owner);
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index 2552e78074a3..af9601da4643 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -11,7 +11,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_FHANDLE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 8f67fb830585..dd460c649152 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=13
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_FHANDLE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index de8143d1f738..d4b03dc3c2c0 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -10,7 +10,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_FHANDLE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
# CONFIG_TIMERFD is not set
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2468c55933cd..25966995da04 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -168,7 +168,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
- (_AC(1, UL) << IRQ_S_EXT))
+ (_AC(1, UL) << IRQ_S_EXT) | \
+ (_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@@ -281,7 +282,7 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
-#define CSR_SSCOUNTOVF 0xda0
+#define CSR_SCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 484d04a92fa6..d96281278586 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -43,6 +43,17 @@
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
+#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
+ BIT(EXC_BREAKPOINT) | \
+ BIT(EXC_SYSCALL) | \
+ BIT(EXC_INST_PAGE_FAULT) | \
+ BIT(EXC_LOAD_PAGE_FAULT) | \
+ BIT(EXC_STORE_PAGE_FAULT))
+
+#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
+ BIT(IRQ_VS_TIMER) | \
+ BIT(IRQ_VS_EXT))
+
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
KVM_RISCV_HFENCE_GVMA_VMID_GPA,
@@ -169,6 +180,7 @@ struct kvm_vcpu_csr {
struct kvm_vcpu_config {
u64 henvcfg;
u64 hstateen0;
+ unsigned long hedeleg;
};
struct kvm_vcpu_smstateen_csr {
@@ -211,6 +223,7 @@ struct kvm_vcpu_arch {
/* CPU context upon Guest VCPU reset */
struct kvm_cpu_context guest_reset_context;
+ spinlock_t reset_cntx_lock;
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
@@ -252,8 +265,9 @@ struct kvm_vcpu_arch {
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* VCPU power-off state */
- bool power_off;
+ /* VCPU power state */
+ struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Don't run the VCPU (blocked) */
bool pause;
@@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
+void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
index 395518a1664e..fa0f535bbbf0 100644
--- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
+++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
@@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64);
struct kvm_fw_event {
/* Current value of the event */
- unsigned long value;
+ u64 value;
/* Event monitoring status */
bool started;
@@ -36,6 +36,7 @@ struct kvm_pmc {
bool started;
/* Monitoring event ID */
unsigned long event_idx;
+ struct kvm_vcpu *vcpu;
};
/* PMU data structure per vcpu */
@@ -50,6 +51,12 @@ struct kvm_pmu {
bool init_done;
/* Bit map of all the virtual counter used */
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ /* Bit map of all the virtual counter overflown */
+ DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
+ /* The address of the counter snapshot area (guest physical address) */
+ gpa_t snapshot_addr;
+ /* The actual data of the snapshot */
+ struct riscv_pmu_snapshot_data *sdata;
};
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context)
@@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long ctr_mask, unsigned long flags,
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata);
-int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
+int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata);
+int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+ unsigned long saddr_high, unsigned long flags,
+ struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 6afd6bb4882e..381137ce70d4 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -55,6 +55,9 @@
#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+#else
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
#endif
/*
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 6e68f8dff76b..112a0a0d9f46 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid {
SBI_EXT_PMU_COUNTER_START,
SBI_EXT_PMU_COUNTER_STOP,
SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
};
union sbi_pmu_ctr_info {
@@ -147,6 +149,13 @@ union sbi_pmu_ctr_info {
};
};
+/* Data structure to contain the pmu snapshot data */
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
@@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type {
#define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF
/* Flags defined for config matching function */
-#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
-#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
-#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
-#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
-#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4)
-#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
-#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
-#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
/* Flags defined for counter start function */
-#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
/* Flags defined for counter stop function */
-#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
enum sbi_ext_dbcn_fid {
SBI_EXT_DBCN_CONSOLE_WRITE = 0,
@@ -266,7 +277,7 @@ struct sbi_sta_struct {
u8 pad[47];
} __packed;
-#define SBI_STA_SHMEM_DISABLE -1
+#define SBI_SHMEM_DISABLE -1
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
@@ -284,6 +295,7 @@ struct sbi_sta_struct {
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
+#define SBI_ERR_NO_SHMEM -9
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void)
static inline unsigned long sbi_mk_version(unsigned long major,
unsigned long minor)
{
- return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
- SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
}
int sbi_err_map_linux_errno(int err);
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index b1c503c2959c..e878e7cc3978 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_ZTSO,
KVM_RISCV_ISA_EXT_ZACAS,
+ KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 5e5a82644451..906f9a3a5d65 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/moduleloader.h>
-#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/pgtable.h>
#include <asm/alternative.h>
@@ -905,17 +904,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-void *module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, MODULES_VADDR,
- MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
index 0d6225fd3194..fa6b0339a65d 100644
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
lo, hi, flags, 0, 0, 0);
if (ret.error) {
- if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
+ if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE)
pr_warn("Failed to disable steal-time shmem");
else
pr_warn("Failed to set steal-time shmem");
@@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu)
static int pv_time_cpu_down_prepare(unsigned int cpu)
{
- return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
- SBI_STA_SHMEM_DISABLE, 0);
+ return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0);
}
static u64 pv_time_steal_clock(int cpu)
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
index 7142ec42e889..a69dfa610aa8 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -11,6 +11,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
- VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index a944294f6f23..0f0a9d11bb5f 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
enable_percpu_irq(hgei_parent_irq,
irq_get_trigger_type(hgei_parent_irq));
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+ /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
}
void kvm_riscv_aia_disable(void)
@@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
return;
hgctrl = get_cpu_ptr(&aia_hgei);
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
/* Disable per-CPU SGEI interrupt */
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
disable_percpu_irq(hgei_parent_irq);
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 225a435d9c9a..bab2ec34cd87 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_arch_hardware_enable(void)
{
- unsigned long hideleg, hedeleg;
-
- hedeleg = 0;
- hedeleg |= (1UL << EXC_INST_MISALIGNED);
- hedeleg |= (1UL << EXC_BREAKPOINT);
- hedeleg |= (1UL << EXC_SYSCALL);
- hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
- hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
- hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
- csr_write(CSR_HEDELEG, hedeleg);
-
- hideleg = 0;
- hideleg |= (1UL << IRQ_VS_SOFT);
- hideleg |= (1UL << IRQ_VS_TIMER);
- hideleg |= (1UL << IRQ_VS_EXT);
- csr_write(CSR_HIDELEG, hideleg);
+ csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
+ csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index a9e2fd7245e1..b63650f9b966 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- int ret;
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.pgd)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
- __pfn_to_phys(pfn), PAGE_SIZE, true, true);
- if (ret) {
- kvm_debug("Failed to map G-stage page (error %d)\n", ret);
- return true;
- }
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
pte_t *ptep;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b5ca9f2e98ac..17e21df36cc1 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
memcpy(csr, reset_csr, sizeof(*csr));
+ spin_lock(&vcpu->arch.reset_cntx_lock);
memcpy(cntx, reset_cntx, sizeof(*cntx));
+ spin_unlock(&vcpu->arch.reset_cntx_lock);
kvm_riscv_vcpu_fp_reset(vcpu);
@@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+ spin_lock_init(&vcpu->arch.mp_state_lock);
+
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
spin_lock_init(&vcpu->arch.hfence_lock);
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+ spin_lock_init(&vcpu->arch.reset_cntx_lock);
+
+ spin_lock(&vcpu->arch.reset_cntx_lock);
cntx = &vcpu->arch.guest_reset_context;
cntx->sstatus = SR_SPP | SR_SPIE;
cntx->hstatus = 0;
cntx->hstatus |= HSTATUS_VTW;
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ spin_unlock(&vcpu->arch.reset_cntx_lock);
if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
return -ENOMEM;
@@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
- !vcpu->arch.power_off && !vcpu->arch.pause);
+ !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
}
}
+ /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
+ if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
+ if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
+ !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
+ clear_bit(IRQ_PMU_OVF, v->irqs_pending);
+ }
+
/* Sync-up AIA high interrupts */
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
@@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
set_bit(irq, vcpu->arch.irqs_pending);
@@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
/*
- * We only allow VS-mode software, timer, and external
+ * We only allow VS-mode software, timer, counter overflow and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
clear_bit(irq, vcpu->arch.irqs_pending);
@@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
}
-void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
-void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = false;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
}
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_on(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_riscv_vcpu_power_off(vcpu);
+ __kvm_riscv_vcpu_power_off(vcpu);
break;
default:
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- /* TODO; To be implemented later. */
- return -EINVAL;
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
+ } else {
+ vcpu->guest_debug = 0;
+ vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
+ }
+
+ return 0;
}
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
@@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
+
+ cfg->hedeleg = KVM_HEDELEG_DEFAULT;
+ if (vcpu->guest_debug)
+ cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSEPC, csr->vsepc);
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
+ csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
csr_write(CSR_HENVCFG, cfg->henvcfg);
@@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) && (!vcpu->arch.pause),
+ (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/*
* Awaken to handle a signal, request to
* sleep again later.
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 2415722c01b8..5761f95abb60 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
break;
+ case EXC_BREAKPOINT:
+ run->exit_reason = KVM_EXIT_DEBUG;
+ ret = 0;
+ break;
default:
break;
}
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 994adc26db4b..c676275ea0a0 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ /* Sscofpmf depends on interrupt filtering defined in ssaia */
+ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
@@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
+ /* There is not architectural config bit to disable sscofpmf completely */
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 86391a5061dd..04db1f993c47 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -14,6 +14,7 @@
#include <asm/csr.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
+#include <asm/sbi.h>
#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
@@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
u64 sample_period;
if (!pmc->counter_val)
- sample_period = counter_val_mask + 1;
+ sample_period = counter_val_mask;
else
sample_period = (-pmc->counter_val) & counter_val_mask;
@@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
}
+static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ unsigned long *out_val)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int fevent_code;
+
+ if (!IS_ENABLED(CONFIG_32BIT)) {
+ pr_warn("%s: should be invoked for only RV32\n", __func__);
+ return -EINVAL;
+ }
+
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld]during read\n", cidx);
+ return -EINVAL;
+ }
+
+ pmc = &kvpmu->pmc[cidx];
+
+ if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
+ return -EINVAL;
+
+ fevent_code = get_event_code(pmc->event_idx);
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+
+ *out_val = pmc->counter_val >> 32;
+
+ return 0;
+}
+
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
unsigned long *out_val)
{
@@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
u64 enabled, running;
int fevent_code;
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld] during read\n", cidx);
+ return -EINVAL;
+ }
+
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
@@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
return 0;
}
-static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
- unsigned long flags, unsigned long eidx, unsigned long evtdata)
+static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct kvm_vcpu *vcpu = pmc->vcpu;
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
+ u64 period;
+
+ /*
+ * Stop the event counting by directly accessing the perf_event.
+ * Otherwise, this needs to deferred via a workqueue.
+ * That will introduce skew in the counter value because the actual
+ * physical counter would start after returning from this function.
+ * It will be stopped again once the workqueue is scheduled
+ */
+ rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * The hw counter would start automatically when this function returns.
+ * Thus, the host may continue to interrupt and inject it to the guest
+ * even without the guest configuring the next event. Depending on the hardware
+ * the host may have some sluggishness only if privilege mode filtering is not
+ * available. In an ideal world, where qemu is not the only capable hardware,
+ * this can be removed.
+ * FYI: ARM64 does this way while x86 doesn't do anything as such.
+ * TODO: Should we keep it for RISC-V ?
+ */
+ period = -(local64_read(&perf_event->count));
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
+
+ set_bit(pmc->idx, kvpmu->pmc_overflown);
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
+
+ rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
+}
+
+static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
+ unsigned long flags, unsigned long eidx,
+ unsigned long evtdata)
{
struct perf_event *event;
@@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr
*/
attr->sample_period = kvm_pmu_get_sample_period(pmc);
- event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
+ event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
@@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
return ret;
}
+static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
+
+ if (kvpmu->sdata) {
+ if (kvpmu->snapshot_addr != INVALID_GPA) {
+ memset(kvpmu->sdata, 0, snapshot_area_size);
+ kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
+ kvpmu->sdata, snapshot_area_size);
+ } else {
+ pr_warn("snapshot address invalid\n");
+ }
+ kfree(kvpmu->sdata);
+ kvpmu->sdata = NULL;
+ }
+ kvpmu->snapshot_addr = INVALID_GPA;
+}
+
+int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+ unsigned long saddr_high, unsigned long flags,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
+ int sbiret = 0;
+ gpa_t saddr;
+ unsigned long hva;
+ bool writable;
+
+ if (!kvpmu || flags) {
+ sbiret = SBI_ERR_INVALID_PARAM;
+ goto out;
+ }
+
+ if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
+ kvm_pmu_clear_snapshot_area(vcpu);
+ return 0;
+ }
+
+ saddr = saddr_low;
+
+ if (saddr_high != 0) {
+ if (IS_ENABLED(CONFIG_32BIT))
+ saddr |= ((gpa_t)saddr_high << 32);
+ else
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
+ if (kvm_is_error_hva(hva) || !writable) {
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
+ if (!kvpmu->sdata)
+ return -ENOMEM;
+
+ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
+ kfree(kvpmu->sdata);
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ kvpmu->snapshot_addr = saddr;
+
+out:
+ retdata->err_val = sbiret;
+
+ return 0;
+}
+
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
struct kvm_vcpu_sbi_return *retdata)
{
@@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
int i, pmc_index, sbiret = 0;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set) {
+ if (kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+ if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data))) {
+ pr_warn("Unable to read snapshot shared memory while starting counters\n");
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+ }
/* Start the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
continue;
+ /* The guest started the counter again. Reset the overflow status */
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
pmc = &kvpmu->pmc[pmc_index];
- if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
+ if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
pmc->counter_val = ival;
+ } else if (snap_flag_set) {
+ /* The counter index in the snapshot are relative to the counter base */
+ pmc->counter_val = kvpmu->sdata->ctr_values[i];
+ }
+
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
fevent_code = get_event_code(pmc->event_idx);
if (fevent_code >= SBI_PMU_FW_MAX) {
@@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
u64 enabled, running;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+ bool shmem_needs_update = false;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+
/* Stop the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
@@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
sbiret = SBI_ERR_ALREADY_STOPPED;
}
- if (flags & SBI_PMU_STOP_FLAG_RESET) {
- /* Relase the counter if this is a reset request */
- pmc->counter_val += perf_event_read_value(pmc->perf_event,
- &enabled, &running);
+ if (flags & SBI_PMU_STOP_FLAG_RESET)
+ /* Release the counter if this is a reset request */
kvm_pmu_release_perf_event(pmc);
- }
} else {
sbiret = SBI_ERR_INVALID_PARAM;
}
+
+ if (snap_flag_set && !sbiret) {
+ if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+ else if (pmc->perf_event)
+ pmc->counter_val += perf_event_read_value(pmc->perf_event,
+ &enabled, &running);
+ /*
+ * The counter and overflow indicies in the snapshot region are w.r.to
+ * cbase. Modify the set bit in the counter mask instead of the pmc_index
+ * which indicates the absolute counter index.
+ */
+ if (test_bit(pmc_index, kvpmu->pmc_overflown))
+ kvpmu->sdata->ctr_overflow_mask |= BIT(i);
+ kvpmu->sdata->ctr_values[i] = pmc->counter_val;
+ shmem_needs_update = true;
+ }
+
if (flags & SBI_PMU_STOP_FLAG_RESET) {
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
clear_bit(pmc_index, kvpmu->pmc_in_use);
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
+ if (snap_flag_set) {
+ /*
+ * Only clear the given counter as the caller is responsible to
+ * validate both the overflow mask and configured counters.
+ */
+ kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
+ shmem_needs_update = true;
+ }
}
}
+ if (shmem_needs_update)
+ kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data));
+
out:
retdata->err_val = sbiret;
@@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata)
{
- int ctr_idx, ret, sbiret = 0;
+ int ctr_idx, sbiret = 0;
+ long ret;
bool is_fevent;
unsigned long event_code;
u32 etype = kvm_pmu_get_perf_event_type(eidx);
@@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
kvpmu->fw_event[event_code].started = true;
} else {
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
- if (ret)
- return ret;
+ if (ret) {
+ sbiret = SBI_ERR_NOT_SUPPORTED;
+ goto out;
+ }
}
set_bit(ctr_idx, kvpmu->pmc_in_use);
@@ -530,7 +740,19 @@ out:
return 0;
}
-int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
+int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ int ret;
+
+ ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
+ if (ret == -EINVAL)
+ retdata->err_val = SBI_ERR_INVALID_PARAM;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata)
{
int ret;
@@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvpmu->snapshot_addr = INVALID_GPA;
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
@@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc = &kvpmu->pmc[i];
pmc->idx = i;
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
+ pmc->vcpu = vcpu;
if (i < kvpmu->num_hw_ctrs) {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
if (i < 3)
@@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc->cinfo.csr = CSR_CYCLE + i;
} else {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
- pmc->cinfo.width = BITS_PER_LONG - 1;
+ pmc->cinfo.width = 63;
}
}
@@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
if (!kvpmu)
return;
- for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
+ for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
pmc = &kvpmu->pmc[i];
pmc->counter_val = 0;
kvm_pmu_release_perf_event(pmc);
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
}
- bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvm_pmu_clear_snapshot_area(vcpu);
}
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 72a2ffb8dcd1..62f409d4176e 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
unsigned long i;
struct kvm_vcpu *tmp;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&vcpu->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&run->system_event, 0, sizeof(run->system_event));
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
index 7dca0e9381d9..dce667f4b6ab 100644
--- a/arch/riscv/kvm/vcpu_sbi_hsm.c
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_vcpu *target_vcpu;
unsigned long target_vcpuid = cp->a0;
+ int ret = 0;
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
- return SBI_ERR_ALREADY_AVAILABLE;
+ spin_lock(&target_vcpu->arch.mp_state_lock);
+
+ if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
+ ret = SBI_ERR_ALREADY_AVAILABLE;
+ goto out;
+ }
+
+ spin_lock(&target_vcpu->arch.reset_cntx_lock);
reset_cntx = &target_vcpu->arch.guest_reset_context;
/* start address */
reset_cntx->sepc = cp->a1;
@@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
reset_cntx->a0 = target_vcpuid;
/* private data passed from kernel */
reset_cntx->a1 = cp->a2;
+ spin_unlock(&target_vcpu->arch.reset_cntx_lock);
+
kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
- kvm_riscv_vcpu_power_on(target_vcpu);
+ __kvm_riscv_vcpu_power_on(target_vcpu);
- return 0;
+out:
+ spin_unlock(&target_vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.power_off)
- return SBI_ERR_FAILURE;
+ int ret = 0;
- kvm_riscv_vcpu_power_off(vcpu);
+ spin_lock(&vcpu->arch.mp_state_lock);
- return 0;
+ if (kvm_riscv_vcpu_stopped(vcpu)) {
+ ret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ __kvm_riscv_vcpu_power_off(vcpu);
+
+out:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
@@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
+ if (!kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STARTED;
else if (vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED;
@@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
{
int ret = 0;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
- struct kvm *kvm = vcpu->kvm;
unsigned long funcid = cp->a6;
switch (funcid) {
case SBI_EXT_HSM_HART_START:
- mutex_lock(&kvm->lock);
ret = kvm_sbi_hsm_vcpu_start(vcpu);
- mutex_unlock(&kvm->lock);
break;
case SBI_EXT_HSM_HART_STOP:
ret = kvm_sbi_hsm_vcpu_stop(vcpu);
diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
index 7eca72df2cbd..e4be34e03e83 100644
--- a/arch/riscv/kvm/vcpu_sbi_pmu.c
+++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
@@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
#endif
/*
* This can fail if perf core framework fails to create an event.
- * Forward the error to userspace because it's an error which
- * happened within the host kernel. The other option would be
- * to convert to an SBI error and forward to the guest.
+ * No need to forward the error to userspace and exit the guest.
+ * The operation can continue without profiling. Forward the
+ * appropriate SBI error to the guest.
*/
ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1,
cp->a2, cp->a3, temp, retdata);
@@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
case SBI_EXT_PMU_COUNTER_FW_READ:
- ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata);
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata);
+ break;
+ case SBI_EXT_PMU_COUNTER_FW_READ_HI:
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata);
+ else
+ retdata->out_val = 0;
+ break;
+ case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
+ ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index d8cf9ca28c61..5f35427114c1 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
if (flags != 0)
return SBI_ERR_INVALID_PARAM;
- if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
- shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
+ if (shmem_phys_lo == SBI_SHMEM_DISABLE &&
+ shmem_phys_hi == SBI_SHMEM_DISABLE) {
vcpu->arch.sta.shmem = INVALID_GPA;
return 0;
}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index ce58bc48e5b8..7396b8654f45 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 968761843203..9940171c79f0 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -24,6 +24,7 @@
#include <linux/elf.h>
#endif
#include <linux/kfence.h>
+#include <linux/execmem.h>
#include <asm/fixmap.h>
#include <asm/io.h>
@@ -1481,3 +1482,37 @@ void __init pgtable_cache_init(void)
preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules");
}
#endif
+
+#ifdef CONFIG_EXECMEM
+#ifdef CONFIG_MMU
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_READ_EXEC,
+ .alignment = 1,
+ },
+ [EXECMEM_BPF] = {
+ .start = BPF_JIT_REGION_START,
+ .end = BPF_JIT_REGION_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = PAGE_SIZE,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_MMU */
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 8a69d6d81e32..0a96abdaca65 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -221,19 +221,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
}
-void *bpf_jit_alloc_exec(unsigned long size)
-{
- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
- BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
-
void *bpf_arch_text_copy(void *dst, void *src, size_t len)
{
int ret;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..ddf2ee47cb87 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/text-patching.h>
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
{
const char *start, *end;
- ftrace_plt = module_alloc(PAGE_SIZE);
+ ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
@@ -296,6 +296,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
#define pr_fmt(fmt) "kprobes: " fmt
-#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,6 +20,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
+#include <linux/execmem.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..91e207b50394 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <linux/memory.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -36,47 +37,10 @@
#define PLT_ENTRY_SIZE 22
-static unsigned long get_module_load_offset(void)
-{
- static DEFINE_MUTEX(module_kaslr_mutex);
- static unsigned long module_load_offset;
-
- if (!kaslr_enabled())
- return 0;
- /*
- * Calculate the module_load_offset the first time this code
- * is called. Once calculated it stays the same until reboot.
- */
- mutex_lock(&module_kaslr_mutex);
- if (!module_load_offset)
- module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- return module_load_offset;
-}
-
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
- return p;
-}
-
#ifdef CONFIG_FUNCTION_TRACER
void module_arch_cleanup(struct module *mod)
{
- module_memfree(mod->arch.trampolines_start);
+ execmem_free(mod->arch.trampolines_start);
}
#endif
@@ -510,7 +474,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
- start = module_alloc(numpages * PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if (!start)
return -ENOMEM;
set_memory_rox((unsigned long)start, numpages);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f6391442c0c2..e769d2726f4e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -49,6 +49,7 @@
#include <asm/uv.h>
#include <linux/virtio_anchor.h>
#include <linux/virtio_config.h>
+#include <linux/execmem.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
@@ -302,3 +303,32 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ unsigned long module_load_offset = 0;
+ unsigned long start;
+
+ if (kaslr_enabled())
+ module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+
+ start = MODULES_VADDR + module_load_offset;
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index 9ee35269bee2..ab3bf72264df 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -6,7 +6,7 @@
# CONFIG_PRINTK is not set
# CONFIG_BUG is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 14d0f5ead502..4765966fec99 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -4,7 +4,7 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_KALLSYMS is not set
# CONFIG_HOTPLUG is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index dc854293da43..20f07aee5bde 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_BUG is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_SHMEM is not set
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index c891945b8a90..00862d3c030d 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_BUG is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_SHMEM is not set
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index e078b193a78a..bfeb004f130e 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_HOTPLUG is not set
# CONFIG_BUG is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
#define VMALLOC_START _AC(0xfe600000,UL)
#define VMALLOC_END _AC(0xffc00000,UL)
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..b8c51cc23d96 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,36 +21,6 @@
#include "entry.h"
-#ifdef CONFIG_SPARC64
-
-#include <linux/jump_label.h>
-
-static void *module_map(unsigned long size)
-{
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#else
-static void *module_map(unsigned long size)
-{
- return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
- void *ret;
-
- ret = module_map(size);
- if (ret)
- memset(ret, 0, size);
-
- return ret;
-}
-
/* Make generic code ignore STT_REGISTER dummy undefined symbols. */
int module_frob_arch_sections(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 809d993f6d88..2d1752108d77 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -14,3 +14,5 @@ obj-$(CONFIG_SPARC32) += leon_mm.o
# Only used by sparc64
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+obj-$(CONFIG_EXECMEM) += execmem.o
diff --git a/arch/sparc/mm/execmem.c b/arch/sparc/mm/execmem.c
new file mode 100644
index 000000000000..0fac97dd5728
--- /dev/null
+++ b/arch/sparc/mm/execmem.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/execmem.h>
+
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index da2df1e84ed4..bda2dbd3f4c5 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/moduleloader.h>
#include <linux/workqueue.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/cache.h>
#include <linux/if_vlan.h>
+#include <linux/execmem.h>
#include <asm/cacheflush.h>
#include <asm/ptrace.h>
@@ -713,7 +713,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
if (unlikely(proglen + ilen > oldproglen)) {
pr_err("bpb_jit_compile fatal error\n");
kfree(addrs);
- module_memfree(image);
+ execmem_free(image);
return;
}
memcpy(image + proglen, temp, ilen);
@@ -736,7 +736,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
break;
}
if (proglen == oldproglen) {
- image = module_alloc(proglen);
+ image = execmem_alloc(EXECMEM_BPF, proglen);
if (!image)
goto out;
}
@@ -758,7 +758,7 @@ out:
void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
- module_memfree(fp->bpf_func);
+ execmem_free(fp->bpf_func);
bpf_prog_unlock_free(fp);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d16fee6bdb8..bc47bc9841ff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86_64
select SWIOTLB
select ARCH_HAS_ELFCORE_COMPAT
select ZONE_DMA32
+ select EXECMEM if DYNAMIC_FTRACE
config FORCE_DYNAMIC_FTRACE
def_bool y
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index a2be3aefff9f..f86ad3335529 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -143,6 +143,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe
extern u64 xstate_get_guest_group_perm(void);
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 91ca9a9ee3a2..ae5482a2f0ca 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,18 +207,11 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
-#ifdef CONFIG_HOTPLUG_CPU
-int arch_crash_hotplug_cpu_support(void);
-#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_crash_hotplug_memory_support(void);
-#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
-#endif
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
unsigned int arch_crash_get_elfcorehdr_size(void);
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 110d7f29ca9a..5187fcf4b610 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -121,6 +121,7 @@ KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
#endif
+KVM_X86_OP_OPTIONAL(dev_get_attr)
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6efd1497b026..ece45b3f6f20 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -254,28 +254,31 @@ enum x86_intercept_stage;
KVM_GUESTDBG_INJECT_DB | \
KVM_GUESTDBG_BLOCKIRQ)
+#define PFERR_PRESENT_MASK BIT(0)
+#define PFERR_WRITE_MASK BIT(1)
+#define PFERR_USER_MASK BIT(2)
+#define PFERR_RSVD_MASK BIT(3)
+#define PFERR_FETCH_MASK BIT(4)
+#define PFERR_PK_MASK BIT(5)
+#define PFERR_SGX_MASK BIT(15)
+#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
+#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
+#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
+#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+/*
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
+ * when emulating instructions that triggers implicit access.
+ */
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
+/*
+ * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
+ * when the guest was accessing private memory.
+ */
+#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
+#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
PFERR_WRITE_MASK | \
@@ -994,9 +997,6 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
- /* set at EPT violation at this point */
- unsigned long exit_qualification;
-
/* pv related host specific info */
struct {
bool pv_unhalted;
@@ -1280,12 +1280,14 @@ enum kvm_apicv_inhibit {
};
struct kvm_arch {
- unsigned long vm_type;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
+ u8 vm_type;
+ bool has_private_mem;
+ bool has_protected_state;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
@@ -1312,6 +1314,8 @@ struct kvm_arch {
*/
spinlock_t mmu_unsync_pages_lock;
+ u64 shadow_mmio_value;
+
struct iommu_domain *iommu_domain;
bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -1779,6 +1783,7 @@ struct kvm_x86_ops {
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
#endif
+ int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
@@ -1844,6 +1849,7 @@ struct kvm_arch_async_pf {
gfn_t gfn;
unsigned long cr3;
bool direct_map;
+ u64 error_code;
};
extern u32 __read_mostly kvm_nr_uret_msrs;
@@ -2140,6 +2146,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
+unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
@@ -2153,8 +2163,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
+
#ifdef CONFIG_KVM_PRIVATE_MEM
-#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
#else
#define kvm_arch_has_private_mem(kvm) false
#endif
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c72c7ff78fcd..d593e52e6635 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -16,10 +16,10 @@ extern int pic_mode;
* Summit or generic (i.e. installer) kernels need lots of bus entries.
* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
*/
-#if CONFIG_BASE_SMALL == 0
-# define MAX_MP_BUSSES 260
-#else
+#ifdef CONFIG_BASE_SMALL
# define MAX_MP_BUSSES 32
+#else
+# define MAX_MP_BUSSES 260
#endif
#define MAX_IRQ_SOURCES 256
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b463fcbd4b90..5a8246dd532f 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -54,8 +54,10 @@
(((unsigned long)fn) << 32))
/* AP Reset Hold */
-#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
-#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0)
/* GHCB GPA Register */
#define GHCB_MSR_REG_GPA_REQ 0x012
@@ -99,6 +101,8 @@ enum psc_op {
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_POS 12
+#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0)
#define GHCB_MSR_HV_FT_RESP_VAL(v) \
/* GHCBData[63:12] */ \
(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4dba17363008..d77a31039f24 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,6 +71,7 @@
#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
@@ -226,6 +227,8 @@ enum vmcs_field {
VMREAD_BITMAP_HIGH = 0x00002027,
VMWRITE_BITMAP = 0x00002028,
VMWRITE_BITMAP_HIGH = 0x00002029,
+ VE_INFORMATION_ADDRESS = 0x0000202A,
+ VE_INFORMATION_ADDRESS_HIGH = 0x0000202B,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
ENCLS_EXITING_BITMAP = 0x0000202E,
@@ -514,6 +517,7 @@ enum vmcs_field {
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_ACCESS_BIT (1ull << 8)
#define VMX_EPT_DIRTY_BIT (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
@@ -630,4 +634,13 @@ enum vmx_l1d_flush_state {
extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+struct vmx_ve_information {
+ u32 exit_reason;
+ u32 delivery;
+ u64 exit_qualification;
+ u64 guest_linear_address;
+ u64 guest_physical_address;
+ u16 eptp_index;
+};
+
#endif
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ef11aa4cab42..9fae1b73b529 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -457,8 +457,13 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
-/* attributes for system fd (group 0) */
-#define KVM_X86_XCOMP_GUEST_SUPP 0
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM 0
+# define KVM_X86_XCOMP_GUEST_SUPP 0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV 1
+# define KVM_X86_SEV_VMSA_FEATURES 0
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -689,6 +694,9 @@ enum sev_cmd_id {
/* Guest Migration Extension */
KVM_SEV_SEND_CANCEL,
+ /* Second time is the charm; improved versions of the above ioctls. */
+ KVM_SEV_INIT2,
+
KVM_SEV_NR_MAX,
};
@@ -700,6 +708,14 @@ struct kvm_sev_cmd {
__u32 sev_fd;
};
+struct kvm_sev_init {
+ __u64 vmsa_features;
+ __u32 flags;
+ __u16 ghcb_version;
+ __u16 pad1;
+ __u32 pad2[8];
+};
+
struct kvm_sev_launch_start {
__u32 handle;
__u32 policy;
@@ -856,5 +872,7 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
+#define KVM_X86_SEV_VM 2
+#define KVM_X86_SEV_ES_VM 3
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e74d0c4286c1..f06501445cd9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -402,20 +402,26 @@ int crash_load_segments(struct kimage *image)
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
-/* These functions provide the value for the sysfs crash_hotplug nodes */
-#ifdef CONFIG_HOTPLUG_CPU
-int arch_crash_hotplug_cpu_support(void)
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
{
- return crash_check_update_elfcorehdr();
-}
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_crash_hotplug_memory_support(void)
-{
- return crash_check_update_elfcorehdr();
-}
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
#endif
+ /*
+ * Initially, crash hotplug support for kexec_load was added
+ * with the KEXEC_UPDATE_ELFCOREHDR flag. Later, this
+ * functionality was expanded to accommodate multiple kexec
+ * segment updates, leading to the introduction of the
+ * KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag bit. Consequently,
+ * when the kexec tool sends either of these flags, it indicates
+ * that the required kexec segment (elfcorehdr) is excluded from
+ * the SHA calculation.
+ */
+ return (kexec_flags & KEXEC_UPDATE_ELFCOREHDR ||
+ kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT);
+}
unsigned int arch_crash_get_elfcorehdr_size(void)
{
@@ -432,10 +438,12 @@ unsigned int arch_crash_get_elfcorehdr_size(void)
/**
* arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
* @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and
+ * NULL for CPU hotplug case.
*
* Prepare the new elfcorehdr and replace the existing elfcorehdr.
*/
-void arch_crash_handle_hotplug_event(struct kimage *image)
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
{
void *elfbuf = NULL, *old_elfcorehdr;
unsigned long nr_mem_ranges;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6276329f5e66..c5a026fee5e0 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -991,6 +991,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
return __raw_xsave_addr(xsave, xfeature_nr);
}
+EXPORT_SYMBOL_GPL(get_xsave_addr);
#ifdef CONFIG_ARCH_HAS_PKEYS
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 19ca623ffa2a..05df04f39628 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -54,8 +54,6 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system_xstate(unsigned int legacy_size);
-extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
-
static inline u64 xfeatures_mask_supervisor(void)
{
return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 70139d9d2e01..8da0e66ca22d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -25,6 +25,7 @@
#include <linux/memory.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
@@ -260,25 +261,14 @@ void arch_ftrace_update_code(int command)
/* Currently only x86_64 supports dynamic trampolines */
#ifdef CONFIG_X86_64
-#ifdef CONFIG_MODULES
-#include <linux/moduleloader.h>
-/* Module allocation simplifies allocating memory for code */
static inline void *alloc_tramp(unsigned long size)
{
- return module_alloc(size);
+ return execmem_alloc(EXECMEM_FTRACE, size);
}
static inline void tramp_free(void *tramp)
{
- module_memfree(tramp);
+ execmem_free(tramp);
}
-#else
-/* Trampolines can only be created if modules are supported */
-static inline void *alloc_tramp(unsigned long size)
-{
- return NULL;
-}
-static inline void tramp_free(void *tramp) { }
-#endif
/* Defined as markers to the end of the ftrace default trampolines */
extern void ftrace_regs_caller_end(void);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d0e49bd7c6f3..72e6a45e7ec2 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -40,12 +40,12 @@
#include <linux/kgdb.h>
#include <linux/ftrace.h>
#include <linux/kasan.h>
-#include <linux/moduleloader.h>
#include <linux/objtool.h>
#include <linux/vmalloc.h>
#include <linux/pgtable.h>
#include <linux/set_memory.h>
#include <linux/cfi.h>
+#include <linux/execmem.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
@@ -495,7 +495,7 @@ void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index dd2ec14adb77..15af7e98e161 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e18914c0e38a..837450b6e882 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -36,57 +36,6 @@ do { \
} while (0)
#endif
-#ifdef CONFIG_RANDOMIZE_BASE
-static unsigned long module_load_offset;
-
-/* Mutex protects the module_load_offset. */
-static DEFINE_MUTEX(module_kaslr_mutex);
-
-static unsigned long int get_module_load_offset(void)
-{
- if (kaslr_enabled()) {
- mutex_lock(&module_kaslr_mutex);
- /*
- * Calculate the module_load_offset the first time this
- * code is called. Once calculated it stays the same until
- * reboot.
- */
- if (module_load_offset == 0)
- module_load_offset =
- get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- }
- return module_load_offset;
-}
-#else
-static unsigned long int get_module_load_offset(void)
-{
- return 0;
-}
-#endif
-
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
-
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
-
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
-
- return p;
-}
-
#ifdef CONFIG_X86_32
int apply_relocate(Elf32_Shdr *sechdrs,
const char *strtab,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 0ebdd088f28b..d64fb2b3eb69 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -95,6 +95,19 @@ config KVM_INTEL
To compile this as a module, choose M here: the module
will be called kvm-intel.
+config KVM_INTEL_PROVE_VE
+ bool "Check that guests do not receive #VE exceptions"
+ default KVM_PROVE_MMU || DEBUG_KERNEL
+ depends on KVM_INTEL
+ help
+
+ Checks that KVM's page table management code will not incorrectly
+ let guests receive a virtualization exception. Virtualization
+ exceptions will be trapped by the hypervisor rather than injected
+ in the guest.
+
+ If unsure, say N.
+
config X86_SGX_KVM
bool "Software Guard eXtensions (SGX) Virtualization"
depends on X86_SGX && KVM_INTEL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index addc44fc7187..5494669a055a 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -16,14 +16,15 @@ kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-$(CONFIG_KVM_SMM) += smm.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
- vmx/nested.o vmx/posted_intr.o
+ vmx/nested.o vmx/posted_intr.o vmx/main.o
kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o
kvm-intel-$(CONFIG_KVM_HYPERV) += vmx/hyperv.o vmx/hyperv_evmcs.o
-kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
- svm/sev.o
-kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o
+kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o
+
+kvm-amd-$(CONFIG_KVM_AMD_SEV) += svm/sev.o
+kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o
ifdef CONFIG_HYPERV
kvm-y += kvm_onhyperv.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 77352a4abd87..f2f2be5d1141 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -772,7 +772,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0);
kvm_cpu_cap_mask(CPUID_8000_001F_EAX,
- 0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) |
+ 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ |
F(SME_COHERENT));
kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
@@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0x80000008: {
- unsigned g_phys_as = (entry->eax >> 16) & 0xff;
- unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
- unsigned phys_as = entry->eax & 0xff;
+ /*
+ * GuestPhysAddrSize (EAX[23:16]) is intended for software
+ * use.
+ *
+ * KVM's ABI is to report the effective MAXPHYADDR for the
+ * guest in PhysAddrSize (phys_as), and the maximum
+ * *addressable* GPA in GuestPhysAddrSize (g_phys_as).
+ *
+ * GuestPhysAddrSize is valid if and only if TDP is enabled,
+ * in which case the max GPA that can be addressed by KVM may
+ * be less than the max GPA that can be legally generated by
+ * the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't
+ * support 5-level TDP.
+ */
+ unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
+ unsigned int phys_as, g_phys_as;
/*
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
@@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* reductions in MAXPHYADDR for memory encryption affect shadow
* paging, too.
*
- * If TDP is enabled but an explicit guest MAXPHYADDR is not
- * provided, use the raw bare metal MAXPHYADDR as reductions to
- * the HPAs do not affect GPAs.
+ * If TDP is enabled, use the raw bare metal MAXPHYADDR as
+ * reductions to the HPAs do not affect GPAs. The max
+ * addressable GPA is the same as the max effective GPA, except
+ * that it's capped at 48 bits if 5-level TDP isn't supported
+ * (hardware processes bits 51:48 only when walking the fifth
+ * level page table).
*/
- if (!tdp_enabled)
- g_phys_as = boot_cpu_data.x86_phys_bits;
- else if (!g_phys_as)
+ if (!tdp_enabled) {
+ phys_as = boot_cpu_data.x86_phys_bits;
+ g_phys_as = 0;
+ } else {
+ phys_as = entry->eax & 0xff;
g_phys_as = phys_as;
+ if (kvm_mmu_get_max_tdp_level() < 5)
+ g_phys_as = min(g_phys_as, 48);
+ }
- entry->eax = g_phys_as | (virt_as << 8);
+ entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 5382646162a3..29ea4313e1bb 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -26,6 +26,7 @@ struct x86_exception {
bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */
u8 async_page_fault;
+ unsigned long exit_qualification;
};
/*
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 60f21bb4c27b..2e454316f2a2 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void)
return boot_cpu_data.x86_phys_bits;
}
+u8 kvm_mmu_get_max_tdp_level(void);
+
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
@@ -213,7 +215,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
*/
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
- int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
+ int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1;
u32 errcode = PFERR_PRESENT_MASK;
bool fault;
@@ -234,8 +236,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
- offset = (pfec & ~1) +
- ((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
+ offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0);
pkru_bits &= mmu->pkru_mask >> offset;
errcode |= -pkru_bits & PFERR_PK_MASK;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index db007a4dffa2..662f62dfb2aa 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte (mm/gup.c).
*
- * An spte tlb flush may be pending, because kvm_set_pte_rmap
- * coalesces them and we are running out of the MMU lock. Therefore
+ * An spte tlb flush may be pending, because they are coalesced and
+ * we are running out of the MMU lock. Therefore
* we need to protect against in-progress updates of the spte.
*
* Reading the spte while an update is in progress may get the old value
@@ -567,9 +567,9 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
if (!is_shadow_present_pte(old_spte) ||
!spte_has_volatile_bits(old_spte))
- __update_clear_spte_fast(sptep, 0ull);
+ __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
else
- old_spte = __update_clear_spte_slow(sptep, 0ull);
+ old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE);
if (!is_shadow_present_pte(old_spte))
return old_spte;
@@ -603,7 +603,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
*/
static void mmu_spte_clear_no_track(u64 *sptep)
{
- __update_clear_spte_fast(sptep, 0ull);
+ __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
}
static u64 mmu_spte_get_lockless(u64 *sptep)
@@ -831,6 +831,15 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
gfn_t gfn;
kvm->arch.indirect_shadow_pages++;
+ /*
+ * Ensure indirect_shadow_pages is elevated prior to re-reading guest
+ * child PTEs in FNAME(gpte_changed), i.e. guarantee either in-flight
+ * emulated writes are visible before re-reading guest PTEs, or that
+ * an emulated write will see the elevated count and acquire mmu_lock
+ * to update SPTEs. Pairs with the smp_mb() in kvm_mmu_track_write().
+ */
+ smp_mb();
+
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
@@ -1448,49 +1457,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
return __kvm_zap_rmap(kvm, rmap_head, slot);
}
-static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t pte)
-{
- u64 *sptep;
- struct rmap_iterator iter;
- bool need_flush = false;
- u64 new_spte;
- kvm_pfn_t new_pfn;
-
- WARN_ON_ONCE(pte_huge(pte));
- new_pfn = pte_pfn(pte);
-
-restart:
- for_each_rmap_spte(rmap_head, &iter, sptep) {
- need_flush = true;
-
- if (pte_write(pte)) {
- kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
- goto restart;
- } else {
- new_spte = kvm_mmu_changed_pte_notifier_make_spte(
- *sptep, new_pfn);
-
- mmu_spte_clear_track_bits(kvm, sptep);
- mmu_spte_set(sptep, new_spte);
- }
- }
-
- if (need_flush && kvm_available_flush_remote_tlbs_range()) {
- kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
- return false;
- }
-
- return need_flush;
-}
-
struct slot_rmap_walk_iterator {
/* input fields. */
const struct kvm_memory_slot *slot;
@@ -1562,7 +1533,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
- int level, pte_t pte);
+ int level);
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
struct kvm_gfn_range *range,
@@ -1574,7 +1545,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
range->start, range->end - 1, &iterator)
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
- iterator.level, range->arg.pte);
+ iterator.level);
return ret;
}
@@ -1596,22 +1567,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return flush;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- bool flush = false;
-
- if (kvm_memslots_have_rmaps(kvm))
- flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
-
- if (tdp_mmu_enabled)
- flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
-
- return flush;
-}
-
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1624,8 +1581,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn,
- int level, pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1950,7 +1906,8 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
{
- if (!sp->spt[i])
+ /* sp->spt[i] has initial value of shadow page table allocation */
+ if (sp->spt[i] == SHADOW_NONPRESENT_VALUE)
return 0;
return vcpu->arch.mmu->sync_spte(vcpu, sp, i);
@@ -2514,7 +2471,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
return kvm_mmu_prepare_zap_page(kvm, child,
invalid_list);
}
- } else if (is_mmio_spte(pte)) {
+ } else if (is_mmio_spte(kvm, pte)) {
mmu_spte_clear_no_track(spte);
}
return 0;
@@ -3314,9 +3271,19 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
{
gva_t gva = fault->is_tdp ? 0 : fault->addr;
+ if (fault->is_private) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return -EFAULT;
+ }
+
vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
access & shadow_mmio_access_mask);
+ fault->slot = NULL;
+ fault->pfn = KVM_PFN_NOSLOT;
+ fault->map_writable = false;
+ fault->hva = KVM_HVA_ERR_BAD;
+
/*
* If MMIO caching is disabled, emulate immediately without
* touching the shadow page tables as attempting to install an
@@ -4196,7 +4163,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
if (WARN_ON_ONCE(reserved))
return -EINVAL;
- if (is_mmio_spte(spte)) {
+ if (is_mmio_spte(vcpu->kvm, spte)) {
gfn_t gfn = get_mmio_spte_gfn(spte);
unsigned int access = get_mmio_spte_access(spte);
@@ -4259,24 +4226,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
}
-static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- gfn_t gfn)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault)
{
struct kvm_arch_async_pf arch;
arch.token = alloc_apf_token(vcpu);
- arch.gfn = gfn;
+ arch.gfn = fault->gfn;
+ arch.error_code = fault->error_code;
arch.direct_map = vcpu->arch.mmu->root_role.direct;
arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
- return kvm_setup_async_pf(vcpu, cr2_or_gpa,
- kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+ return kvm_setup_async_pf(vcpu, fault->addr,
+ kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
}
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{
int r;
+ if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS))
+ return;
+
if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) ||
work->wakeup_all)
return;
@@ -4289,7 +4260,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return;
- kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
+ kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
}
static inline u8 kvm_max_level_for_order(int order)
@@ -4309,14 +4280,6 @@ static inline u8 kvm_max_level_for_order(int order)
return PG_LEVEL_4K;
}
-static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
- struct kvm_page_fault *fault)
-{
- kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
- PAGE_SIZE, fault->write, fault->exec,
- fault->is_private);
-}
-
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
@@ -4343,48 +4306,15 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
- struct kvm_memory_slot *slot = fault->slot;
bool async;
- /*
- * Retry the page fault if the gfn hit a memslot that is being deleted
- * or moved. This ensures any existing SPTEs for the old memslot will
- * be zapped before KVM inserts a new MMIO SPTE for the gfn.
- */
- if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
- return RET_PF_RETRY;
-
- if (!kvm_is_visible_memslot(slot)) {
- /* Don't expose private memslots to L2. */
- if (is_guest_mode(vcpu)) {
- fault->slot = NULL;
- fault->pfn = KVM_PFN_NOSLOT;
- fault->map_writable = false;
- return RET_PF_CONTINUE;
- }
- /*
- * If the APIC access page exists but is disabled, go directly
- * to emulation without caching the MMIO access or creating a
- * MMIO SPTE. That way the cache doesn't need to be purged
- * when the AVIC is re-enabled.
- */
- if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
- !kvm_apicv_activated(vcpu->kvm))
- return RET_PF_EMULATE;
- }
-
- if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
- kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
- return -EFAULT;
- }
-
if (fault->is_private)
return kvm_faultin_pfn_private(vcpu, fault);
async = false;
- fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
- fault->write, &fault->map_writable,
- &fault->hva);
+ fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, false,
+ &async, fault->write,
+ &fault->map_writable, &fault->hva);
if (!async)
return RET_PF_CONTINUE; /* *pfn has correct page already */
@@ -4394,7 +4324,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return RET_PF_RETRY;
- } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
+ } else if (kvm_arch_setup_async_pf(vcpu, fault)) {
return RET_PF_RETRY;
}
}
@@ -4404,17 +4334,72 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
* to wait for IO. Note, gup always bails if it is unable to quickly
* get a page and a fatal signal, i.e. SIGKILL, is pending.
*/
- fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
- fault->write, &fault->map_writable,
- &fault->hva);
+ fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, true,
+ NULL, fault->write,
+ &fault->map_writable, &fault->hva);
return RET_PF_CONTINUE;
}
static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
unsigned int access)
{
+ struct kvm_memory_slot *slot = fault->slot;
int ret;
+ /*
+ * Note that the mmu_invalidate_seq also serves to detect a concurrent
+ * change in attributes. is_page_fault_stale() will detect an
+ * invalidation relate to fault->fn and resume the guest without
+ * installing a mapping in the page tables.
+ */
+ fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ /*
+ * Now that we have a snapshot of mmu_invalidate_seq we can check for a
+ * private vs. shared mismatch.
+ */
+ if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return -EFAULT;
+ }
+
+ if (unlikely(!slot))
+ return kvm_handle_noslot_fault(vcpu, fault, access);
+
+ /*
+ * Retry the page fault if the gfn hit a memslot that is being deleted
+ * or moved. This ensures any existing SPTEs for the old memslot will
+ * be zapped before KVM inserts a new MMIO SPTE for the gfn.
+ */
+ if (slot->flags & KVM_MEMSLOT_INVALID)
+ return RET_PF_RETRY;
+
+ if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) {
+ /*
+ * Don't map L1's APIC access page into L2, KVM doesn't support
+ * using APICv/AVIC to accelerate L2 accesses to L1's APIC,
+ * i.e. the access needs to be emulated. Emulating access to
+ * L1's APIC is also correct if L1 is accelerating L2's own
+ * virtual APIC, but for some reason L1 also maps _L1's_ APIC
+ * into L2. Note, vcpu_is_mmio_gpa() always treats access to
+ * the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM
+ * uses different roots for L1 vs. L2, i.e. there is no danger
+ * of breaking APICv/AVIC for L1.
+ */
+ if (is_guest_mode(vcpu))
+ return kvm_handle_noslot_fault(vcpu, fault, access);
+
+ /*
+ * If the APIC access page exists but is disabled, go directly
+ * to emulation without caching the MMIO access or creating a
+ * MMIO SPTE. That way the cache doesn't need to be purged
+ * when the AVIC is re-enabled.
+ */
+ if (!kvm_apicv_activated(vcpu->kvm))
+ return RET_PF_EMULATE;
+ }
+
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
@@ -4439,8 +4424,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
* to detect retry guarantees the worst case latency for the vCPU.
*/
- if (fault->slot &&
- mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
+ if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
return RET_PF_RETRY;
ret = __kvm_faultin_pfn(vcpu, fault);
@@ -4450,7 +4434,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
if (unlikely(is_error_pfn(fault->pfn)))
return kvm_handle_error_pfn(vcpu, fault);
- if (unlikely(!fault->slot))
+ if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn)))
return kvm_handle_noslot_fault(vcpu, fault, access);
/*
@@ -4561,6 +4545,16 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
if (WARN_ON_ONCE(fault_address >> 32))
return -EFAULT;
#endif
+ /*
+ * Legacy #PF exception only have a 32-bit error code. Simply drop the
+ * upper bits as KVM doesn't use them for #PF (because they are never
+ * set), and to ensure there are no collisions with KVM-defined bits.
+ */
+ if (WARN_ON_ONCE(error_code >> 32))
+ error_code = lower_32_bits(error_code);
+
+ /* Ensure the above sanity check also covers KVM-defined flags. */
+ BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
vcpu->arch.l1tf_flush_l1d = true;
if (!flags) {
@@ -4812,7 +4806,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned int access)
{
- if (unlikely(is_mmio_spte(*sptep))) {
+ if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) {
if (gfn != get_mmio_spte_gfn(*sptep)) {
mmu_spte_clear_no_track(sptep);
return true;
@@ -5322,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
return max_tdp_level;
}
+u8 kvm_mmu_get_max_tdp_level(void)
+{
+ return tdp_root_level ? tdp_root_level : max_tdp_level;
+}
+
static union kvm_mmu_page_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
union kvm_cpu_role cpu_role)
@@ -5802,10 +5801,15 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
bool flush = false;
/*
- * If we don't have indirect shadow pages, it means no page is
- * write-protected, so we can exit simply.
+ * When emulating guest writes, ensure the written value is visible to
+ * any task that is handling page faults before checking whether or not
+ * KVM is shadowing a guest PTE. This ensures either KVM will create
+ * the correct SPTE in the page fault handler, or this task will see
+ * a non-zero indirect_shadow_pages. Pairs with the smp_mb() in
+ * account_shadowed().
*/
- if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
+ smp_mb();
+ if (!vcpu->kvm->arch.indirect_shadow_pages)
return;
write_lock(&vcpu->kvm->mmu_lock);
@@ -5846,30 +5850,35 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
int r, emulation_type = EMULTYPE_PF;
bool direct = vcpu->arch.mmu->root_role.direct;
- /*
- * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
- * checks when emulating instructions that triggers implicit access.
- * WARN if hardware generates a fault with an error code that collides
- * with the KVM-defined value. Clear the flag and continue on, i.e.
- * don't terminate the VM, as KVM can't possibly be relying on a flag
- * that KVM doesn't know about.
- */
- if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
- error_code &= ~PFERR_IMPLICIT_ACCESS;
-
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
return RET_PF_RETRY;
+ /*
+ * Except for reserved faults (emulated MMIO is shared-only), set the
+ * PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's
+ * current attributes, which are the source of truth for such VMs. Note,
+ * this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't
+ * currently supported nested virtualization (among many other things)
+ * for software-protected VMs.
+ */
+ if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) &&
+ !(error_code & PFERR_RSVD_MASK) &&
+ vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM &&
+ kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)))
+ error_code |= PFERR_PRIVATE_ACCESS;
+
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
+ if (WARN_ON_ONCE(error_code & PFERR_PRIVATE_ACCESS))
+ return -EFAULT;
+
r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE)
goto emulate;
}
if (r == RET_PF_INVALID) {
- r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
- lower_32_bits(error_code), false,
+ r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
&emulation_type);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
return -EIO;
@@ -6173,7 +6182,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
- vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
+ vcpu->arch.mmu_shadow_page_cache.init_value =
+ SHADOW_NONPRESENT_VALUE;
+ if (!vcpu->arch.mmu_shadow_page_cache.init_value)
+ vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@@ -6316,6 +6328,7 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
void kvm_mmu_init_vm(struct kvm *kvm)
{
+ kvm->arch.shadow_mmio_value = shadow_mmio_value;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 5390a591a571..ce2fcd19ba6b 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
struct kvm_page_fault {
/* arguments to kvm_mmu_do_page_fault. */
const gpa_t addr;
- const u32 error_code;
+ const u64 error_code;
const bool prefetch;
/* Derived from error_code. */
@@ -279,8 +279,16 @@ enum {
RET_PF_SPURIOUS,
};
+static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault)
+{
+ kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
+ PAGE_SIZE, fault->write, fault->exec,
+ fault->is_private);
+}
+
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u32 err, bool prefetch, int *emulation_type)
+ u64 err, bool prefetch, int *emulation_type)
{
struct kvm_page_fault fault = {
.addr = cr2_or_gpa,
@@ -298,7 +306,10 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
.max_level = KVM_MAX_HUGEPAGE_LEVEL,
.req_level = PG_LEVEL_4K,
.goal_level = PG_LEVEL_4K,
- .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
+ .is_private = err & PFERR_PRIVATE_ACCESS,
+
+ .pfn = KVM_PFN_ERR_FAULT,
+ .hva = KVM_HVA_ERR_BAD,
};
int r;
@@ -320,6 +331,17 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
else
r = vcpu->arch.mmu->page_fault(vcpu, &fault);
+ /*
+ * Not sure what's happening, but punt to userspace and hope that
+ * they can fix it by changing memory to shared, or they can
+ * provide a better error.
+ */
+ if (r == RET_PF_EMULATE && fault.is_private) {
+ pr_warn_ratelimited("kvm: unexpected emulation request on private memory\n");
+ kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
+ return -EFAULT;
+ }
+
if (fault.write_fault_to_shadow_pgtable && emulation_type)
*emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index ae86820cef69..195d98bc8de8 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -260,7 +260,7 @@ TRACE_EVENT(
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(gpa_t, cr2_or_gpa)
- __field(u32, error_code)
+ __field(u64, error_code)
__field(u64 *, sptep)
__field(u64, old_spte)
__field(u64, new_spte)
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index f6448284c18e..561c331fd6ec 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
- kvfree(slot->arch.gfn_write_track);
+ vfree(slot->arch.gfn_write_track);
slot->arch.gfn_write_track = NULL;
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 4d4e98fe4f35..d3dbcf382ed2 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -497,21 +497,21 @@ error:
* The other bits are set to 0.
*/
if (!(errcode & PFERR_RSVD_MASK)) {
- vcpu->arch.exit_qualification &= (EPT_VIOLATION_GVA_IS_VALID |
- EPT_VIOLATION_GVA_TRANSLATED);
+ walker->fault.exit_qualification = 0;
+
if (write_fault)
- vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
+ walker->fault.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
if (user_fault)
- vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
+ walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
if (fetch_fault)
- vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
+ walker->fault.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
/*
* Note, pte_access holds the raw RWX bits from the EPTE, not
* ACC_*_MASK flags!
*/
- vcpu->arch.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
- EPT_VIOLATION_RWX_SHIFT;
+ walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
+ EPT_VIOLATION_RWX_SHIFT;
}
#endif
walker->fault.address = addr;
@@ -911,7 +911,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
gpa_t pte_gpa;
gfn_t gfn;
- if (WARN_ON_ONCE(!sp->spt[i]))
+ if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE))
return 0;
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
@@ -933,13 +933,13 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
return 0;
/*
- * Drop the SPTE if the new protections would result in a RWX=0
- * SPTE or if the gfn is changing. The RWX=0 case only affects
- * EPT with execute-only support, i.e. EPT without an effective
- * "present" bit, as all other paging modes will create a
- * read-only SPTE if pte_access is zero.
+ * Drop the SPTE if the new protections result in no effective
+ * "present" bit or if the gfn is changing. The former case
+ * only affects EPT with execute-only support with pte_access==0;
+ * all other paging modes will create a read-only SPTE if
+ * pte_access is zero.
*/
- if ((!pte_access && !shadow_present_mask) ||
+ if ((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE ||
gfn != kvm_mmu_page_get_gfn(sp, i)) {
drop_spte(vcpu->kvm, &sp->spt[i]);
return 1;
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 4a599130e9c9..a5e014d7bc62 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
u64 spte = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;
- WARN_ON_ONCE(!shadow_mmio_value);
+ WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value);
access &= shadow_mmio_access_mask;
- spte |= shadow_mmio_value | access;
+ spte |= vcpu->kvm->arch.shadow_mmio_value | access;
spte |= gpa | shadow_nonpresent_or_rsvd_mask;
spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
@@ -144,19 +144,19 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 spte = SPTE_MMU_PRESENT_MASK;
bool wrprot = false;
- WARN_ON_ONCE(!pte_access && !shadow_present_mask);
+ /*
+ * For the EPT case, shadow_present_mask has no RWX bits set if
+ * exec-only page table entries are supported. In that case,
+ * ACC_USER_MASK and shadow_user_mask are used to represent
+ * read access. See FNAME(gpte_access) in paging_tmpl.h.
+ */
+ WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
if (sp->role.ad_disabled)
spte |= SPTE_TDP_AD_DISABLED;
else if (kvm_mmu_page_ad_need_write_protect(sp))
spte |= SPTE_TDP_AD_WRPROT_ONLY;
- /*
- * For the EPT case, shadow_present_mask is 0 if hardware
- * supports exec-only page table entries. In that case,
- * ACC_USER_MASK and shadow_user_mask are used to represent
- * read access. See FNAME(gpte_access) in paging_tmpl.h.
- */
spte |= shadow_present_mask;
if (!prefetch)
spte |= spte_shadow_accessed_mask(spte);
@@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
return spte;
}
-u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
-{
- u64 new_spte;
-
- new_spte = old_spte & ~SPTE_BASE_ADDR_MASK;
- new_spte |= (u64)new_pfn << PAGE_SHIFT;
-
- new_spte &= ~PT_WRITABLE_MASK;
- new_spte &= ~shadow_host_writable_mask;
- new_spte &= ~shadow_mmu_writable_mask;
-
- new_spte = mark_spte_for_access_track(new_spte);
-
- return new_spte;
-}
-
u64 mark_spte_for_access_track(u64 spte)
{
if (spte_ad_enabled(spte))
@@ -429,7 +413,9 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull;
shadow_nx_mask = 0ull;
shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
- shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK;
+ /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
+ shadow_present_mask =
+ (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
/*
* EPT overrides the host MTRRs, and so KVM must program the desired
* memtype directly into the SPTEs. Note, this mask is just the mask
@@ -446,7 +432,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
* of an EPT paging-structure entry is 110b (write/execute).
*/
kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
- VMX_EPT_RWX_MASK, 0);
+ VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0);
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks);
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index a129951c9a88..5dd5405fa07a 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -149,6 +149,22 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11);
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+/*
+ * Non-present SPTE value needs to set bit 63 for TDX, in order to suppress
+ * #VE and get EPT violations on non-present PTEs. We can use the
+ * same value also without TDX for both VMX and SVM:
+ *
+ * For SVM NPT, for non-present spte (bit 0 = 0), other bits are ignored.
+ * For VMX EPT, bit 63 is ignored if #VE is disabled. (EPT_VIOLATION_VE=0)
+ * bit 63 is #VE suppress if #VE is enabled. (EPT_VIOLATION_VE=1)
+ */
+#ifdef CONFIG_X86_64
+#define SHADOW_NONPRESENT_VALUE BIT_ULL(63)
+static_assert(!(SHADOW_NONPRESENT_VALUE & SPTE_MMU_PRESENT_MASK));
+#else
+#define SHADOW_NONPRESENT_VALUE 0ULL
+#endif
+
extern u64 __read_mostly shadow_host_writable_mask;
extern u64 __read_mostly shadow_mmu_writable_mask;
extern u64 __read_mostly shadow_nx_mask;
@@ -190,11 +206,11 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
*
* Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
* both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
- * vulnerability. Use only low bits to avoid 64-bit immediates.
+ * vulnerability.
*
* Only used by the TDP MMU.
*/
-#define REMOVED_SPTE 0x5a0ULL
+#define REMOVED_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL)
/* Removed SPTEs must not be misconstrued as shadow present PTEs. */
static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
@@ -249,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
return spte_to_child_sp(root);
}
-static inline bool is_mmio_spte(u64 spte)
+static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
{
- return (spte & shadow_mmio_mask) == shadow_mmio_value &&
+ return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
likely(enable_mmio_caching);
}
@@ -496,8 +512,6 @@ static inline u64 restore_acc_track_spte(u64 spte)
return spte;
}
-u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
-
void __init kvm_mmu_spte_module_init(void);
void kvm_mmu_reset_all_pte_masks(void);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 04c1f0957fea..1259dd63defc 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
- if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
- !is_mmio_spte(new_spte) &&
+ if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) &&
+ !is_mmio_spte(kvm, new_spte) &&
!is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
@@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
}
+static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte)
+{
+ u64 *sptep = rcu_dereference(iter->sptep);
+
+ /*
+ * The caller is responsible for ensuring the old SPTE is not a REMOVED
+ * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
+ * and pre-checking before inserting a new SPTE is advantageous as it
+ * avoids unnecessary work.
+ */
+ WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
+
+ /*
+ * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
+ * does not hold the mmu_lock. On failure, i.e. if a different logical
+ * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
+ * the current value, so the caller operates on fresh data, e.g. if it
+ * retries tdp_mmu_set_spte_atomic()
+ */
+ if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
+ return -EBUSY;
+
+ return 0;
+}
+
/*
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
* and handle the associated bookkeeping. Do not mark the page dirty
@@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
- u64 *sptep = rcu_dereference(iter->sptep);
-
- /*
- * The caller is responsible for ensuring the old SPTE is not a REMOVED
- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
- * and pre-checking before inserting a new SPTE is advantageous as it
- * avoids unnecessary work.
- */
- WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
+ int ret;
lockdep_assert_held_read(&kvm->mmu_lock);
- /*
- * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
- * does not hold the mmu_lock. On failure, i.e. if a different logical
- * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
- * the current value, so the caller operates on fresh data, e.g. if it
- * retries tdp_mmu_set_spte_atomic()
- */
- if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
- return -EBUSY;
+ ret = __tdp_mmu_set_spte_atomic(iter, new_spte);
+ if (ret)
+ return ret;
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
new_spte, iter->level, true);
@@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
{
int ret;
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
/*
- * Freeze the SPTE by setting it to a special,
- * non-present value. This will stop other threads from
- * immediately installing a present entry in its place
- * before the TLBs are flushed.
+ * Freeze the SPTE by setting it to a special, non-present value. This
+ * will stop other threads from immediately installing a present entry
+ * in its place before the TLBs are flushed.
+ *
+ * Delay processing of the zapped SPTE until after TLBs are flushed and
+ * the REMOVED_SPTE is replaced (see below).
*/
- ret = tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE);
+ ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE);
if (ret)
return ret;
@@ -599,11 +614,19 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
/*
* No other thread can overwrite the removed SPTE as they must either
* wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
- * overwrite the special removed SPTE value. No bookkeeping is needed
- * here since the SPTE is going from non-present to non-present. Use
- * the raw write helper to avoid an unnecessary check on volatile bits.
+ * overwrite the special removed SPTE value. Use the raw write helper to
+ * avoid an unnecessary check on volatile bits.
*/
- __kvm_tdp_mmu_write_spte(iter->sptep, 0);
+ __kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
+
+ /*
+ * Process the zapped SPTE after flushing TLBs, and after replacing
+ * REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are
+ * blocked by the REMOVED_SPTE and reduces contention on the child
+ * SPTEs.
+ */
+ handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+ 0, iter->level, true);
return 0;
}
@@ -740,8 +763,8 @@ retry:
continue;
if (!shared)
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
- else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
+ else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE))
goto retry;
}
}
@@ -808,8 +831,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;
- tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
- sp->gfn, sp->role.level + 1);
+ tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
+ SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
return true;
}
@@ -843,7 +866,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
/*
* Zappings SPTEs in invalid roots doesn't require a TLB flush,
@@ -1028,7 +1051,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
}
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
- if (unlikely(is_mmio_spte(new_spte))) {
+ if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
vcpu->stat.pf_mmio_spte_created++;
trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
new_spte);
@@ -1258,52 +1281,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
}
-static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
- struct kvm_gfn_range *range)
-{
- u64 new_spte;
-
- /* Huge pages aren't expected to be modified without first being zapped. */
- WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
-
- if (iter->level != PG_LEVEL_4K ||
- !is_shadow_present_pte(iter->old_spte))
- return false;
-
- /*
- * Note, when changing a read-only SPTE, it's not strictly necessary to
- * zero the SPTE before setting the new PFN, but doing so preserves the
- * invariant that the PFN of a present * leaf SPTE can never change.
- * See handle_changed_spte().
- */
- tdp_mmu_iter_set_spte(kvm, iter, 0);
-
- if (!pte_write(range->arg.pte)) {
- new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
- pte_pfn(range->arg.pte));
-
- tdp_mmu_iter_set_spte(kvm, iter, new_spte);
- }
-
- return true;
-}
-
-/*
- * Handle the changed_pte MMU notifier for the TDP MMU.
- * data is a pointer to the new pte_t mapping the HVA specified by the MMU
- * notifier.
- * Returns non-zero if a flush is needed before releasing the MMU lock.
- */
-bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /*
- * No need to handle the remote TLB flush under RCU protection, the
- * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
- * shadow page. See the WARN on pfn_changed in handle_changed_spte().
- */
- return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
-}
-
/*
* Remove write access from all SPTEs at or above min_level that map GFNs
* [start, end). Returns true if an SPTE has been changed and the TLBs need to
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 6e1ea04ca885..58b55e61bd33 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush);
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot, int min_level);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 759581bb2128..0623cfaa7bb0 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -23,6 +23,7 @@
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
+#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
#include "mmu.h"
@@ -32,22 +33,12 @@
#include "cpuid.h"
#include "trace.h"
-#ifndef CONFIG_KVM_AMD_SEV
-/*
- * When this config is not defined, SEV feature is not supported and APIs in
- * this file are not used but this file still gets compiled into the KVM AMD
- * module.
- *
- * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
- * misc_res_type {} defined in linux/misc_cgroup.h.
- *
- * Below macros allow compilation to succeed.
- */
-#define MISC_CG_RES_SEV MISC_CG_RES_TYPES
-#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
-#endif
+#define GHCB_VERSION_MAX 2ULL
+#define GHCB_VERSION_DEFAULT 2ULL
+#define GHCB_VERSION_MIN 1ULL
+
+#define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP
-#ifdef CONFIG_KVM_AMD_SEV
/* enable/disable SEV support */
static bool sev_enabled = true;
module_param_named(sev, sev_enabled, bool, 0444);
@@ -57,13 +48,13 @@ static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
/* enable/disable SEV-ES DebugSwap support */
-static bool sev_es_debug_swap_enabled = false;
+static bool sev_es_debug_swap_enabled = true;
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
-#else
-#define sev_enabled false
-#define sev_es_enabled false
-#define sev_es_debug_swap_enabled false
-#endif /* CONFIG_KVM_AMD_SEV */
+static u64 sev_supported_vmsa_features;
+
+#define AP_RESET_HOLD_NONE 0
+#define AP_RESET_HOLD_NAE_EVENT 1
+#define AP_RESET_HOLD_MSR_PROTO 2
static u8 sev_enc_bit;
static DECLARE_RWSEM(sev_deactivate_lock);
@@ -113,7 +104,15 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
static inline bool is_mirroring_enc_context(struct kvm *kvm)
{
- return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
+ return !!to_kvm_sev_info(kvm)->enc_context_owner;
+}
+
+static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+
+ return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP;
}
/* Must be called with the sev_bitmap_lock held */
@@ -251,20 +250,44 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_decommission(handle);
}
-static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
+ struct kvm_sev_init *data,
+ unsigned long vm_type)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_platform_init_args init_args = {0};
+ bool es_active = vm_type != KVM_X86_SEV_VM;
+ u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0;
int ret;
if (kvm->created_vcpus)
return -EINVAL;
+ if (data->flags)
+ return -EINVAL;
+
+ if (data->vmsa_features & ~valid_vmsa_features)
+ return -EINVAL;
+
+ if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version))
+ return -EINVAL;
+
if (unlikely(sev->active))
return -EINVAL;
sev->active = true;
- sev->es_active = argp->id == KVM_SEV_ES_INIT;
+ sev->es_active = es_active;
+ sev->vmsa_features = data->vmsa_features;
+ sev->ghcb_version = data->ghcb_version;
+
+ /*
+ * Currently KVM supports the full range of mandatory features defined
+ * by version 2 of the GHCB protocol, so default to that for SEV-ES
+ * guests created via KVM_SEV_INIT2.
+ */
+ if (sev->es_active && !sev->ghcb_version)
+ sev->ghcb_version = GHCB_VERSION_DEFAULT;
+
ret = sev_asid_new(sev);
if (ret)
goto e_no_asid;
@@ -276,6 +299,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
INIT_LIST_HEAD(&sev->regions_list);
INIT_LIST_HEAD(&sev->mirror_vms);
+ sev->need_init = false;
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV);
@@ -286,11 +310,53 @@ e_free:
sev_asid_free(sev);
sev->asid = 0;
e_no_asid:
+ sev->vmsa_features = 0;
sev->es_active = false;
sev->active = false;
return ret;
}
+static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_init data = {
+ .vmsa_features = 0,
+ .ghcb_version = 0,
+ };
+ unsigned long vm_type;
+
+ if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM)
+ return -EINVAL;
+
+ vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM);
+
+ /*
+ * KVM_SEV_ES_INIT has been deprecated by KVM_SEV_INIT2, so it will
+ * continue to only ever support the minimal GHCB protocol version.
+ */
+ if (vm_type == KVM_X86_SEV_ES_VM)
+ data.ghcb_version = GHCB_VERSION_MIN;
+
+ return __sev_guest_init(kvm, argp, &data, vm_type);
+}
+
+static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_init data;
+
+ if (!sev->need_init)
+ return -EINVAL;
+
+ if (kvm->arch.vm_type != KVM_X86_SEV_VM &&
+ kvm->arch.vm_type != KVM_X86_SEV_ES_VM)
+ return -EINVAL;
+
+ if (copy_from_user(&data, u64_to_user_ptr(argp->data), sizeof(data)))
+ return -EFAULT;
+
+ return __sev_guest_init(kvm, argp, &data, kvm->arch.vm_type);
+}
+
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
unsigned int asid = sev_get_asid(kvm);
@@ -339,7 +405,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
memset(&start, 0, sizeof(start));
@@ -383,7 +449,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* return handle to userspace */
params.handle = start.handle;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+ if (copy_to_user(u64_to_user_ptr(argp->data), &params, sizeof(params))) {
sev_unbind_asid(kvm, start.handle);
ret = -EFAULT;
goto e_free_session;
@@ -522,7 +588,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
vaddr = params.uaddr;
@@ -580,7 +646,13 @@ e_unpin:
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
struct sev_es_save_area *save = svm->sev_es.vmsa;
+ struct xregs_state *xsave;
+ const u8 *s;
+ u8 *d;
+ int i;
/* Check some debug related fields before encrypting the VMSA */
if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
@@ -621,10 +693,44 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
- if (sev_es_debug_swap_enabled) {
- save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
- pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. "
- "This will not work starting with Linux 6.10\n");
+ save->sev_features = sev->vmsa_features;
+
+ /*
+ * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid
+ * breaking older measurements.
+ */
+ if (vcpu->kvm->arch.vm_type != KVM_X86_DEFAULT_VM) {
+ xsave = &vcpu->arch.guest_fpu.fpstate->regs.xsave;
+ save->x87_dp = xsave->i387.rdp;
+ save->mxcsr = xsave->i387.mxcsr;
+ save->x87_ftw = xsave->i387.twd;
+ save->x87_fsw = xsave->i387.swd;
+ save->x87_fcw = xsave->i387.cwd;
+ save->x87_fop = xsave->i387.fop;
+ save->x87_ds = 0;
+ save->x87_cs = 0;
+ save->x87_rip = xsave->i387.rip;
+
+ for (i = 0; i < 8; i++) {
+ /*
+ * The format of the x87 save area is undocumented and
+ * definitely not what you would expect. It consists of
+ * an 8*8 bytes area with bytes 0-7, and an 8*2 bytes
+ * area with bytes 8-9 of each register.
+ */
+ d = save->fpreg_x87 + i * 8;
+ s = ((u8 *)xsave->i387.st_space) + i * 16;
+ memcpy(d, s, 8);
+ save->fpreg_x87[64 + i * 2] = s[8];
+ save->fpreg_x87[64 + i * 2 + 1] = s[9];
+ }
+ memcpy(save->fpreg_xmm, xsave->i387.xmm_space, 256);
+
+ s = get_xsave_addr(xsave, XFEATURE_YMM);
+ if (s)
+ memcpy(save->fpreg_ymm, s, 256);
+ else
+ memset(save->fpreg_ymm, 0, 256);
}
pr_debug("Virtual Machine Save Area (VMSA):\n");
@@ -658,13 +764,20 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
vmsa.reserved = 0;
- vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
+ vmsa.handle = to_kvm_sev_info(kvm)->handle;
vmsa.address = __sme_pa(svm->sev_es.vmsa);
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
return ret;
+ /*
+ * SEV-ES guests maintain an encrypted version of their FPU
+ * state which is restored and saved on VMRUN and VMEXIT.
+ * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't
+ * do xsave/xrstor on it.
+ */
+ fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.guest_state_protected = true;
return 0;
}
@@ -695,7 +808,7 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- void __user *measure = (void __user *)(uintptr_t)argp->data;
+ void __user *measure = u64_to_user_ptr(argp->data);
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_measure data;
struct kvm_sev_launch_measure params;
@@ -715,7 +828,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
- p = (void __user *)(uintptr_t)params.uaddr;
+ p = u64_to_user_ptr(params.uaddr);
if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE)
return -EINVAL;
@@ -788,7 +901,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
params.state = data.state;
params.handle = data.handle;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ if (copy_to_user(u64_to_user_ptr(argp->data), &params, sizeof(params)))
ret = -EFAULT;
return ret;
@@ -953,7 +1066,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+ if (copy_from_user(&debug, u64_to_user_ptr(argp->data), sizeof(debug)))
return -EFAULT;
if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
@@ -1037,7 +1150,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
@@ -1101,7 +1214,7 @@ e_unpin_memory:
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- void __user *report = (void __user *)(uintptr_t)argp->data;
+ void __user *report = u64_to_user_ptr(argp->data);
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_attestation_report data;
struct kvm_sev_attestation_report params;
@@ -1112,7 +1225,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
memset(&data, 0, sizeof(data));
@@ -1121,7 +1234,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
- p = (void __user *)(uintptr_t)params.uaddr;
+ p = u64_to_user_ptr(params.uaddr);
if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE)
return -EINVAL;
@@ -1174,7 +1287,7 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
params->session_len = data.session_len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ if (copy_to_user(u64_to_user_ptr(argp->data), params,
sizeof(struct kvm_sev_send_start)))
ret = -EFAULT;
@@ -1193,7 +1306,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data),
sizeof(struct kvm_sev_send_start)))
return -EFAULT;
@@ -1248,7 +1361,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
- if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
+ if (!ret && copy_to_user(u64_to_user_ptr(params.session_uaddr),
session_data, params.session_len)) {
ret = -EFAULT;
goto e_free_amd_cert;
@@ -1256,7 +1369,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
params.policy = data.policy;
params.session_len = data.session_len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params,
+ if (copy_to_user(u64_to_user_ptr(argp->data), &params,
sizeof(struct kvm_sev_send_start)))
ret = -EFAULT;
@@ -1287,7 +1400,7 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
params->hdr_len = data.hdr_len;
params->trans_len = data.trans_len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ if (copy_to_user(u64_to_user_ptr(argp->data), params,
sizeof(struct kvm_sev_send_update_data)))
ret = -EFAULT;
@@ -1307,7 +1420,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data),
sizeof(struct kvm_sev_send_update_data)))
return -EFAULT;
@@ -1358,14 +1471,14 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_trans_data;
/* copy transport buffer to user space */
- if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
+ if (copy_to_user(u64_to_user_ptr(params.trans_uaddr),
trans_data, params.trans_len)) {
ret = -EFAULT;
goto e_free_trans_data;
}
/* Copy packet header to userspace. */
- if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ if (copy_to_user(u64_to_user_ptr(params.hdr_uaddr), hdr,
params.hdr_len))
ret = -EFAULT;
@@ -1417,7 +1530,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
return -ENOTTY;
/* Get parameter from the userspace */
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data),
sizeof(struct kvm_sev_receive_start)))
return -EFAULT;
@@ -1459,7 +1572,7 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
params.handle = start.handle;
- if (copy_to_user((void __user *)(uintptr_t)argp->data,
+ if (copy_to_user(u64_to_user_ptr(argp->data),
&params, sizeof(struct kvm_sev_receive_start))) {
ret = -EFAULT;
sev_unbind_asid(kvm, start.handle);
@@ -1490,7 +1603,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!sev_guest(kvm))
return -EINVAL;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ if (copy_from_user(&params, u64_to_user_ptr(argp->data),
sizeof(struct kvm_sev_receive_update_data)))
return -EFAULT;
@@ -1705,6 +1818,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
dst->pages_locked = src->pages_locked;
dst->enc_context_owner = src->enc_context_owner;
dst->es_active = src->es_active;
+ dst->vmsa_features = src->vmsa_features;
src->asid = 0;
src->active = false;
@@ -1812,7 +1926,8 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
if (ret)
goto out_fput;
- if (sev_guest(kvm) || !sev_guest(source_kvm)) {
+ if (kvm->arch.vm_type != source_kvm->arch.vm_type ||
+ sev_guest(kvm) || !sev_guest(source_kvm)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -1861,6 +1976,21 @@ out_fput:
return ret;
}
+int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
+{
+ if (group != KVM_X86_GRP_SEV)
+ return -ENXIO;
+
+ switch (attr) {
+ case KVM_X86_SEV_VMSA_FEATURES:
+ *val = sev_supported_vmsa_features;
+ return 0;
+
+ default:
+ return -ENXIO;
+ }
+}
+
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1894,6 +2024,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
case KVM_SEV_INIT:
r = sev_guest_init(kvm, &sev_cmd);
break;
+ case KVM_SEV_INIT2:
+ r = sev_guest_init2(kvm, &sev_cmd);
+ break;
case KVM_SEV_LAUNCH_START:
r = sev_launch_start(kvm, &sev_cmd);
break;
@@ -2121,6 +2254,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
mirror_sev->asid = source_sev->asid;
mirror_sev->fd = source_sev->fd;
mirror_sev->es_active = source_sev->es_active;
+ mirror_sev->need_init = false;
mirror_sev->handle = source_sev->handle;
INIT_LIST_HEAD(&mirror_sev->regions_list);
INIT_LIST_HEAD(&mirror_sev->mirror_vms);
@@ -2186,15 +2320,18 @@ void sev_vm_destroy(struct kvm *kvm)
void __init sev_set_cpu_caps(void)
{
- if (!sev_enabled)
- kvm_cpu_cap_clear(X86_FEATURE_SEV);
- if (!sev_es_enabled)
- kvm_cpu_cap_clear(X86_FEATURE_SEV_ES);
+ if (sev_enabled) {
+ kvm_cpu_cap_set(X86_FEATURE_SEV);
+ kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_VM);
+ }
+ if (sev_es_enabled) {
+ kvm_cpu_cap_set(X86_FEATURE_SEV_ES);
+ kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_ES_VM);
+ }
}
void __init sev_hardware_setup(void)
{
-#ifdef CONFIG_KVM_AMD_SEV
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
bool sev_es_supported = false;
bool sev_supported = false;
@@ -2294,7 +2431,10 @@ out:
if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
sev_es_debug_swap_enabled = false;
-#endif
+
+ sev_supported_vmsa_features = 0;
+ if (sev_es_debug_swap_enabled)
+ sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
}
void sev_hardware_unsetup(void)
@@ -2585,6 +2725,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_VMGEXIT_AP_HLT_LOOP:
case SVM_VMGEXIT_AP_JUMP_TABLE:
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+ case SVM_VMGEXIT_HV_FEATURES:
+ case SVM_VMGEXIT_TERM_REQUEST:
break;
default:
reason = GHCB_ERR_INVALID_EVENT;
@@ -2615,6 +2757,9 @@ vmgexit_err:
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
+ /* Clear any indication that the vCPU is in a type of AP Reset Hold */
+ svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE;
+
if (!svm->sev_es.ghcb)
return;
@@ -2774,6 +2919,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
u64 ghcb_info;
int ret = 1;
@@ -2784,7 +2930,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
switch (ghcb_info) {
case GHCB_MSR_SEV_INFO_REQ:
- set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
GHCB_VERSION_MIN,
sev_enc_bit));
break;
@@ -2826,6 +2972,28 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
GHCB_MSR_INFO_POS);
break;
}
+ case GHCB_MSR_AP_RESET_HOLD_REQ:
+ svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
+ ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+
+ /*
+ * Preset the result to a non-SIPI return and then only set
+ * the result to non-zero when delivering a SIPI.
+ */
+ set_ghcb_msr_bits(svm, 0,
+ GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
+ GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
+
+ set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
+ GHCB_MSR_INFO_MASK,
+ GHCB_MSR_INFO_POS);
+ break;
+ case GHCB_MSR_HV_FT_REQ:
+ set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED,
+ GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS);
+ set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP,
+ GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
+ break;
case GHCB_MSR_TERM_REQ: {
u64 reason_set, reason_code;
@@ -2925,6 +3093,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = 1;
break;
case SVM_VMGEXIT_AP_HLT_LOOP:
+ svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT;
ret = kvm_emulate_ap_reset_hold(vcpu);
break;
case SVM_VMGEXIT_AP_JUMP_TABLE: {
@@ -2949,6 +3118,19 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = 1;
break;
}
+ case SVM_VMGEXIT_HV_FEATURES:
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED);
+
+ ret = 1;
+ break;
+ case SVM_VMGEXIT_TERM_REQUEST:
+ pr_info("SEV-ES guest requested termination: reason %#llx info %#llx\n",
+ control->exit_info_1, control->exit_info_2);
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM;
+ vcpu->run->system_event.ndata = 1;
+ vcpu->run->system_event.data[0] = control->ghcb_gpa;
+ break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
@@ -3063,7 +3245,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
svm_set_intercept(svm, TRAP_CR8_WRITE);
vmcb->control.intercepts[INTERCEPT_DR] = 0;
- if (!sev_es_debug_swap_enabled) {
+ if (!sev_vcpu_has_debug_swap(svm)) {
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
@@ -3109,16 +3291,19 @@ void sev_init_vmcb(struct vcpu_svm *svm)
void sev_es_vcpu_reset(struct vcpu_svm *svm)
{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+
/*
* Set the GHCB MSR value as per the GHCB specification when emulating
* vCPU RESET for an SEV-ES guest.
*/
- set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+ set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version,
GHCB_VERSION_MIN,
sev_enc_bit));
}
-void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
+void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
{
/*
* All host state for SEV-ES guests is categorized into three swap types
@@ -3146,7 +3331,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
* saves and loads debug registers (Type-A).
*/
- if (sev_es_debug_swap_enabled) {
+ if (sev_vcpu_has_debug_swap(svm)) {
hostsa->dr0 = native_get_debugreg(0);
hostsa->dr1 = native_get_debugreg(1);
hostsa->dr2 = native_get_debugreg(2);
@@ -3168,15 +3353,31 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
return;
}
- /*
- * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
- * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
- * non-zero value.
- */
- if (!svm->sev_es.ghcb)
- return;
+ /* Subsequent SIPI */
+ switch (svm->sev_es.ap_reset_hold_type) {
+ case AP_RESET_HOLD_NAE_EVENT:
+ /*
+ * Return from an AP Reset Hold VMGEXIT, where the guest will
+ * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
+ */
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+ break;
+ case AP_RESET_HOLD_MSR_PROTO:
+ /*
+ * Return from an AP Reset Hold VMGEXIT, where the guest will
+ * set the CS and RIP. Set GHCB data field to a non-zero value.
+ */
+ set_ghcb_msr_bits(svm, 1,
+ GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
+ GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+ set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
+ GHCB_MSR_INFO_MASK,
+ GHCB_MSR_INFO_POS);
+ break;
+ default:
+ break;
+ }
}
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9aaf83c8d57d..c8dc25886c16 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1433,14 +1433,6 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
vmsa_page = snp_safe_alloc_page(vcpu);
if (!vmsa_page)
goto error_free_vmcb_page;
-
- /*
- * SEV-ES guests maintain an encrypted version of their FPU
- * state which is restored and saved on VMRUN and VMEXIT.
- * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't
- * do xsave/xrstor on it.
- */
- fpstate_set_confidential(&vcpu->arch.guest_fpu);
}
err = avic_init_vcpu(svm);
@@ -1525,7 +1517,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
*/
vmsave(sd->save_area_pa);
if (sev_es_guest(vcpu->kvm))
- sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));
+ sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
@@ -2056,6 +2048,15 @@ static int npf_interception(struct kvm_vcpu *vcpu)
u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
+ /*
+ * WARN if hardware generates a fault with an error code that collides
+ * with KVM-defined sythentic flags. Clear the flags and continue on,
+ * i.e. don't terminate the VM, as KVM can't possibly be relying on a
+ * flag that KVM doesn't know about.
+ */
+ if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
+ error_code &= ~PFERR_SYNTHETIC_MASK;
+
trace_kvm_page_fault(vcpu, fault_address, error_code);
return kvm_mmu_page_fault(vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
@@ -3304,7 +3305,9 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
+#ifdef CONFIG_KVM_AMD_SEV
[SVM_EXIT_VMGEXIT] = sev_handle_vmgexit,
+#endif
};
static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -4085,6 +4088,9 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
+ if (to_kvm_sev_info(vcpu->kvm)->need_init)
+ return -EINVAL;
+
return 1;
}
@@ -4892,6 +4898,14 @@ static void svm_vm_destroy(struct kvm *kvm)
static int svm_vm_init(struct kvm *kvm)
{
+ int type = kvm->arch.vm_type;
+
+ if (type != KVM_X86_DEFAULT_VM &&
+ type != KVM_X86_SW_PROTECTED_VM) {
+ kvm->arch.has_protected_state = (type == KVM_X86_SEV_ES_VM);
+ to_kvm_sev_info(kvm)->need_init = true;
+ }
+
if (!pause_filter_count || !pause_filter_thresh)
kvm->arch.pause_in_guest = true;
@@ -5026,6 +5040,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_smi_window = svm_enable_smi_window,
#endif
+#ifdef CONFIG_KVM_AMD_SEV
+ .dev_get_attr = sev_dev_get_attr,
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
@@ -5033,7 +5049,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
.vm_move_enc_context_from = sev_vm_move_enc_context_from,
-
+#endif
.check_emulate_instruction = svm_check_emulate_instruction,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 33878efdebc8..be57213cd295 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -79,12 +79,15 @@ enum {
struct kvm_sev_info {
bool active; /* SEV enabled guest */
bool es_active; /* SEV-ES enabled guest */
+ bool need_init; /* waiting for SEV_INIT2 */
unsigned int asid; /* ASID used for this guest */
unsigned int handle; /* SEV firmware handle */
int fd; /* SEV device fd */
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
+ u64 vmsa_features;
+ u16 ghcb_version; /* Highest guest GHCB protocol version allowed */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct list_head mirror_vms; /* List of VMs mirroring */
struct list_head mirror_entry; /* Use as a list entry of mirrors */
@@ -197,6 +200,7 @@ struct vcpu_sev_es_state {
u8 valid_bitmap[16];
struct kvm_host_map ghcb_map;
bool received_first_sipi;
+ unsigned int ap_reset_hold_type;
/* SEV-ES scratch area support */
u64 sw_scratch;
@@ -318,6 +322,11 @@ static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
}
+static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm)
+{
+ return &to_kvm_svm(kvm)->sev_info;
+}
+
static __always_inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
@@ -664,13 +673,16 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
/* sev.c */
-#define GHCB_VERSION_MAX 1ULL
-#define GHCB_VERSION_MIN 1ULL
-
-
-extern unsigned int max_sev_asid;
+void pre_sev_run(struct vcpu_svm *svm, int cpu);
+void sev_init_vmcb(struct vcpu_svm *svm);
+void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_es_vcpu_reset(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
+void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
-void sev_vm_destroy(struct kvm *kvm);
+#ifdef CONFIG_KVM_AMD_SEV
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp);
int sev_mem_enc_register_region(struct kvm *kvm,
struct kvm_enc_region *range);
@@ -679,22 +691,32 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
void sev_guest_memory_reclaimed(struct kvm *kvm);
+int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
-void pre_sev_run(struct vcpu_svm *svm, int cpu);
+/* These symbols are used in common code and are stubbed below. */
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
+void sev_free_vcpu(struct kvm_vcpu *vcpu);
+void sev_vm_destroy(struct kvm *kvm);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
void sev_hardware_unsetup(void);
int sev_cpu_init(struct svm_cpu_data *sd);
-void sev_init_vmcb(struct vcpu_svm *svm);
-void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
-void sev_free_vcpu(struct kvm_vcpu *vcpu);
-int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
-int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
-void sev_es_vcpu_reset(struct vcpu_svm *svm);
-void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
-void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
-void sev_es_unmap_ghcb(struct vcpu_svm *svm);
-struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
+int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
+extern unsigned int max_sev_asid;
+#else
+static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) {
+ return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+}
+
+static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {}
+static inline void sev_vm_destroy(struct kvm *kvm) {}
+static inline void __init sev_set_cpu_caps(void) {}
+static inline void __init sev_hardware_setup(void) {}
+static inline void sev_hardware_unsetup(void) {}
+static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
+static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
+#define max_sev_asid 0
+#endif
/* vmenter.S */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index c6b4b1728006..9d0b02ef307e 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition,
);
/*
- * Tracepoint for VT-d posted-interrupts.
+ * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
*/
TRACE_EVENT(kvm_pi_irte_update,
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
@@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update,
__entry->set = set;
),
- TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
+ TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
"gvec: 0x%x, pi_desc_addr: 0x%llx",
__entry->set ? "enabled and being updated" : "disabled",
__entry->host_irq,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
new file mode 100644
index 000000000000..d4ed681785fd
--- /dev/null
+++ b/arch/x86/kvm/vmx/main.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/moduleparam.h>
+
+#include "x86_ops.h"
+#include "vmx.h"
+#include "nested.h"
+#include "pmu.h"
+#include "posted_intr.h"
+
+#define VMX_REQUIRED_APICV_INHIBITS \
+ (BIT(APICV_INHIBIT_REASON_DISABLE)| \
+ BIT(APICV_INHIBIT_REASON_ABSENT) | \
+ BIT(APICV_INHIBIT_REASON_HYPERV) | \
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
+ BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED))
+
+struct kvm_x86_ops vt_x86_ops __initdata = {
+ .name = KBUILD_MODNAME,
+
+ .check_processor_compatibility = vmx_check_processor_compat,
+
+ .hardware_unsetup = vmx_hardware_unsetup,
+
+ .hardware_enable = vmx_hardware_enable,
+ .hardware_disable = vmx_hardware_disable,
+ .has_emulated_msr = vmx_has_emulated_msr,
+
+ .vm_size = sizeof(struct kvm_vmx),
+ .vm_init = vmx_vm_init,
+ .vm_destroy = vmx_vm_destroy,
+
+ .vcpu_precreate = vmx_vcpu_precreate,
+ .vcpu_create = vmx_vcpu_create,
+ .vcpu_free = vmx_vcpu_free,
+ .vcpu_reset = vmx_vcpu_reset,
+
+ .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
+ .vcpu_load = vmx_vcpu_load,
+ .vcpu_put = vmx_vcpu_put,
+
+ .update_exception_bitmap = vmx_update_exception_bitmap,
+ .get_msr_feature = vmx_get_msr_feature,
+ .get_msr = vmx_get_msr,
+ .set_msr = vmx_set_msr,
+ .get_segment_base = vmx_get_segment_base,
+ .get_segment = vmx_get_segment,
+ .set_segment = vmx_set_segment,
+ .get_cpl = vmx_get_cpl,
+ .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .is_valid_cr0 = vmx_is_valid_cr0,
+ .set_cr0 = vmx_set_cr0,
+ .is_valid_cr4 = vmx_is_valid_cr4,
+ .set_cr4 = vmx_set_cr4,
+ .set_efer = vmx_set_efer,
+ .get_idt = vmx_get_idt,
+ .set_idt = vmx_set_idt,
+ .get_gdt = vmx_get_gdt,
+ .set_gdt = vmx_set_gdt,
+ .set_dr7 = vmx_set_dr7,
+ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
+ .cache_reg = vmx_cache_reg,
+ .get_rflags = vmx_get_rflags,
+ .set_rflags = vmx_set_rflags,
+ .get_if_flag = vmx_get_if_flag,
+
+ .flush_tlb_all = vmx_flush_tlb_all,
+ .flush_tlb_current = vmx_flush_tlb_current,
+ .flush_tlb_gva = vmx_flush_tlb_gva,
+ .flush_tlb_guest = vmx_flush_tlb_guest,
+
+ .vcpu_pre_run = vmx_vcpu_pre_run,
+ .vcpu_run = vmx_vcpu_run,
+ .handle_exit = vmx_handle_exit,
+ .skip_emulated_instruction = vmx_skip_emulated_instruction,
+ .update_emulated_instruction = vmx_update_emulated_instruction,
+ .set_interrupt_shadow = vmx_set_interrupt_shadow,
+ .get_interrupt_shadow = vmx_get_interrupt_shadow,
+ .patch_hypercall = vmx_patch_hypercall,
+ .inject_irq = vmx_inject_irq,
+ .inject_nmi = vmx_inject_nmi,
+ .inject_exception = vmx_inject_exception,
+ .cancel_injection = vmx_cancel_injection,
+ .interrupt_allowed = vmx_interrupt_allowed,
+ .nmi_allowed = vmx_nmi_allowed,
+ .get_nmi_mask = vmx_get_nmi_mask,
+ .set_nmi_mask = vmx_set_nmi_mask,
+ .enable_nmi_window = vmx_enable_nmi_window,
+ .enable_irq_window = vmx_enable_irq_window,
+ .update_cr8_intercept = vmx_update_cr8_intercept,
+ .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+ .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
+ .load_eoi_exitmap = vmx_load_eoi_exitmap,
+ .apicv_pre_state_restore = vmx_apicv_pre_state_restore,
+ .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
+ .hwapic_irr_update = vmx_hwapic_irr_update,
+ .hwapic_isr_update = vmx_hwapic_isr_update,
+ .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
+ .sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .deliver_interrupt = vmx_deliver_interrupt,
+ .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
+
+ .set_tss_addr = vmx_set_tss_addr,
+ .set_identity_map_addr = vmx_set_identity_map_addr,
+ .get_mt_mask = vmx_get_mt_mask,
+
+ .get_exit_info = vmx_get_exit_info,
+
+ .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
+
+ .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+
+ .get_l2_tsc_offset = vmx_get_l2_tsc_offset,
+ .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
+ .write_tsc_offset = vmx_write_tsc_offset,
+ .write_tsc_multiplier = vmx_write_tsc_multiplier,
+
+ .load_mmu_pgd = vmx_load_mmu_pgd,
+
+ .check_intercept = vmx_check_intercept,
+ .handle_exit_irqoff = vmx_handle_exit_irqoff,
+
+ .sched_in = vmx_sched_in,
+
+ .cpu_dirty_log_size = PML_ENTITY_NUM,
+ .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
+
+ .nested_ops = &vmx_nested_ops,
+
+ .pi_update_irte = vmx_pi_update_irte,
+ .pi_start_assignment = vmx_pi_start_assignment,
+
+#ifdef CONFIG_X86_64
+ .set_hv_timer = vmx_set_hv_timer,
+ .cancel_hv_timer = vmx_cancel_hv_timer,
+#endif
+
+ .setup_mce = vmx_setup_mce,
+
+#ifdef CONFIG_KVM_SMM
+ .smi_allowed = vmx_smi_allowed,
+ .enter_smm = vmx_enter_smm,
+ .leave_smm = vmx_leave_smm,
+ .enable_smi_window = vmx_enable_smi_window,
+#endif
+
+ .check_emulate_instruction = vmx_check_emulate_instruction,
+ .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
+ .migrate_timers = vmx_migrate_timers,
+
+ .msr_filter_changed = vmx_msr_filter_changed,
+ .complete_emulated_msr = kvm_complete_insn_gp,
+
+ .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+ .get_untagged_addr = vmx_get_untagged_addr,
+};
+
+struct kvm_x86_init_ops vt_init_ops __initdata = {
+ .hardware_setup = vmx_hardware_setup,
+ .handle_intel_pt_intr = NULL,
+
+ .runtime_ops = &vt_x86_ops,
+ .pmu_ops = &intel_pmu_ops,
+};
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d05ddf751491..d5b832126e34 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -409,18 +409,40 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long exit_qualification;
u32 vm_exit_reason;
- unsigned long exit_qualification = vcpu->arch.exit_qualification;
if (vmx->nested.pml_full) {
vm_exit_reason = EXIT_REASON_PML_FULL;
vmx->nested.pml_full = false;
- exit_qualification &= INTR_INFO_UNBLOCK_NMI;
+
+ /*
+ * It should be impossible to trigger a nested PML Full VM-Exit
+ * for anything other than an EPT Violation from L2. KVM *can*
+ * trigger nEPT page fault injection in response to an EPT
+ * Misconfig, e.g. if the MMIO SPTE was stale and L1's EPT
+ * tables also changed, but KVM should not treat EPT Misconfig
+ * VM-Exits as writes.
+ */
+ WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION);
+
+ /*
+ * PML Full and EPT Violation VM-Exits both use bit 12 to report
+ * "NMI unblocking due to IRET", i.e. the bit can be propagated
+ * as-is from the original EXIT_QUALIFICATION.
+ */
+ exit_qualification = vmx_get_exit_qual(vcpu) & INTR_INFO_UNBLOCK_NMI;
} else {
- if (fault->error_code & PFERR_RSVD_MASK)
+ if (fault->error_code & PFERR_RSVD_MASK) {
vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
- else
+ exit_qualification = 0;
+ } else {
+ exit_qualification = fault->exit_qualification;
+ exit_qualification |= vmx_get_exit_qual(vcpu) &
+ (EPT_VIOLATION_GVA_IS_VALID |
+ EPT_VIOLATION_GVA_TRANSLATED);
vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ }
/*
* Although the caller (kvm_inject_emulated_page_fault) would
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 7c1996b433e2..b25625314658 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -140,6 +140,11 @@ static inline bool is_nm_fault(u32 intr_info)
return is_exception_n(intr_info, NM_VECTOR);
}
+static inline bool is_ve_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, VE_VECTOR);
+}
+
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index becefaf95cab..6051fad5945f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -68,6 +68,7 @@
#include "vmcs12.h"
#include "vmx.h"
#include "x86.h"
+#include "x86_ops.h"
#include "smm.h"
#include "vmx_onhyperv.h"
#include "posted_intr.h"
@@ -531,8 +532,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
static unsigned long host_idt_base;
#if IS_ENABLED(CONFIG_HYPERV)
-static struct kvm_x86_ops vmx_x86_ops __initdata;
-
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
@@ -582,9 +581,8 @@ static __init void hv_init_evmcs(void)
}
if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
- vmx_x86_ops.enable_l2_tlb_flush
+ vt_x86_ops.enable_l2_tlb_flush
= hv_enable_l2_tlb_flush;
-
} else {
enlightened_vmcs = false;
}
@@ -875,6 +873,12 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
/*
+ * #VE isn't used for VMX. To test against unexpected changes
+ * related to #VE for VMX, intercept unexpected #VE and warn on it.
+ */
+ if (IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE))
+ eb |= 1u << VE_VECTOR;
+ /*
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
@@ -1478,7 +1482,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
-static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1489,7 +1493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx->host_debugctlmsr = get_debugctlmsr();
}
-static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
@@ -1548,7 +1552,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmx->emulation_required = vmx_emulation_required(vcpu);
}
-static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
+bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
{
return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
}
@@ -1654,8 +1658,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
- void *insn, int insn_len)
+int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
/*
* Emulation of instructions in SGX enclaves is impossible as RIP does
@@ -1739,7 +1743,7 @@ rip_updated:
* Recognizes a pending MTF VM-exit and records the nested state for later
* delivery.
*/
-static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
+void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1770,7 +1774,7 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
}
}
-static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
vmx_update_emulated_instruction(vcpu);
return skip_emulated_instruction(vcpu);
@@ -1789,7 +1793,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
-static void vmx_inject_exception(struct kvm_vcpu *vcpu)
+void vmx_inject_exception(struct kvm_vcpu *vcpu)
{
struct kvm_queued_exception *ex = &vcpu->arch.exception;
u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
@@ -1910,12 +1914,12 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
return kvm_caps.default_tsc_scaling_ratio;
}
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
+void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
{
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
}
-static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
+void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
}
@@ -1958,7 +1962,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
return !(msr->data & ~valid_bits);
}
-static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
@@ -1975,7 +1979,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmx_uret_msr *msr;
@@ -2156,7 +2160,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmx_uret_msr *msr;
@@ -2459,7 +2463,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return ret;
}
-static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
+void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
unsigned long guest_owned_bits;
@@ -2607,6 +2611,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
&_cpu_based_2nd_exec_control))
return -EIO;
}
+ if (!IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE))
+ _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE;
+
#ifndef CONFIG_X86_64
if (!(_cpu_based_2nd_exec_control &
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
@@ -2631,6 +2638,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return -EIO;
vmx_cap->ept = 0;
+ _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE;
}
if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
vmx_cap->vpid) {
@@ -2760,7 +2768,7 @@ static bool kvm_is_vmx_supported(void)
return supported;
}
-static int vmx_check_processor_compat(void)
+int vmx_check_processor_compat(void)
{
int cpu = raw_smp_processor_id();
struct vmcs_config vmcs_conf;
@@ -2802,7 +2810,7 @@ fault:
return -EFAULT;
}
-static int vmx_hardware_enable(void)
+int vmx_hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2842,7 +2850,7 @@ static void vmclear_local_loaded_vmcss(void)
__loaded_vmcs_clear(v);
}
-static void vmx_hardware_disable(void)
+void vmx_hardware_disable(void)
{
vmclear_local_loaded_vmcss();
@@ -3156,7 +3164,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
-static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
+void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3186,7 +3194,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->vpid;
}
-static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
+void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
u64 root_hpa = mmu->root.hpa;
@@ -3202,7 +3210,7 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
vpid_sync_context(vmx_get_current_vpid(vcpu));
}
-static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
{
/*
* vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
@@ -3211,7 +3219,7 @@ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
}
-static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
/*
* vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
@@ -3256,7 +3264,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
CPU_BASED_CR3_STORE_EXITING)
-static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
if (is_guest_mode(vcpu))
return nested_guest_cr0_valid(vcpu, cr0);
@@ -3377,8 +3385,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
return eptp;
}
-static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
- int root_level)
+void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
{
struct kvm *kvm = vcpu->kvm;
bool update_guest_cr3 = true;
@@ -3407,8 +3414,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
vmcs_writel(GUEST_CR3, guest_cr3);
}
-
-static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
/*
* We operate under the default treatment of SMM, so VMX cannot be
@@ -3524,7 +3530,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
var->g = (ar >> 15) & 1;
}
-static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
+u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_segment s;
@@ -3601,14 +3607,14 @@ void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
}
-static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
+void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
{
__vmx_set_segment(vcpu, var, seg);
to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
}
-static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
+void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{
u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
@@ -3616,25 +3622,25 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
*l = (ar >> 13) & 1;
}
-static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
dt->address = vmcs_readl(GUEST_IDTR_BASE);
}
-static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
vmcs_writel(GUEST_IDTR_BASE, dt->address);
}
-static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
dt->address = vmcs_readl(GUEST_GDTR_BASE);
}
-static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{
vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
vmcs_writel(GUEST_GDTR_BASE, dt->address);
@@ -4102,7 +4108,7 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
}
}
-static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
+bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
void *vapic_page;
@@ -4122,7 +4128,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
return ((rvi & 0xf0) > (vppr & 0xf0));
}
-static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 i;
@@ -4266,8 +4272,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
return 0;
}
-static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
- int trig_mode, int vector)
+void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
{
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -4429,7 +4435,7 @@ static u32 vmx_vmexit_ctrl(void)
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
}
-static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4595,6 +4601,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
if (!enable_ept) {
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE;
enable_unrestricted_guest = 0;
}
if (!enable_unrestricted_guest)
@@ -4693,7 +4700,7 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
return 0;
}
-static int vmx_vcpu_precreate(struct kvm *kvm)
+int vmx_vcpu_precreate(struct kvm *kvm)
{
return vmx_alloc_ipiv_pid_table(kvm);
}
@@ -4718,8 +4725,12 @@ static void init_vmcs(struct vcpu_vmx *vmx)
exec_controls_set(vmx, vmx_exec_control(vmx));
- if (cpu_has_secondary_exec_ctrls())
+ if (cpu_has_secondary_exec_ctrls()) {
secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));
+ if (vmx->ve_info)
+ vmcs_write64(VE_INFORMATION_ADDRESS,
+ __pa(vmx->ve_info));
+ }
if (cpu_has_tertiary_exec_ctrls())
tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));
@@ -4848,7 +4859,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
__pi_set_sn(&vmx->pi_desc);
}
-static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4907,12 +4918,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx_update_fb_clear_dis(vcpu, vmx);
}
-static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
+void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
{
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
}
-static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
+void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
{
if (!enable_vnmi ||
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
@@ -4923,7 +4934,7 @@ static void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
}
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
+void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
uint32_t intr;
@@ -4951,7 +4962,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
vmx_clear_hlt(vcpu);
}
-static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5029,7 +5040,7 @@ bool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
GUEST_INTR_STATE_NMI));
}
-static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
if (to_vmx(vcpu)->nested.nested_run_pending)
return -EBUSY;
@@ -5051,7 +5062,7 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
-static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
if (to_vmx(vcpu)->nested.nested_run_pending)
return -EBUSY;
@@ -5066,7 +5077,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !vmx_interrupt_blocked(vcpu);
}
-static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
+int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
void __user *ret;
@@ -5086,7 +5097,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return init_rmode_tss(kvm, ret);
}
-static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
{
to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
return 0;
@@ -5207,6 +5218,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
+ if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm))
+ return -EIO;
+
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
@@ -5372,8 +5386,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
return kvm_fast_pio(vcpu, size, port, in);
}
-static void
-vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
{
/*
* Patch in the VMCALL instruction:
@@ -5579,7 +5592,7 @@ out:
return kvm_complete_insn_gp(vcpu, err);
}
-static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
@@ -5598,7 +5611,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
set_debugreg(DR6_RESERVED, 6);
}
-static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
+void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
{
vmcs_writel(GUEST_DR7, val);
}
@@ -5771,8 +5784,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
- vcpu->arch.exit_qualification = exit_qualification;
-
/*
* Check that the GPA doesn't exceed physical memory limits, as that is
* a guest page fault. We have to emulate the instruction here, because
@@ -5869,7 +5880,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
return 1;
}
-static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
+int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
if (vmx_emulation_required_with_pending_exception(vcpu)) {
kvm_prepare_emulation_failure_exit(vcpu);
@@ -6157,9 +6168,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
static const int kvm_vmx_max_exit_handlers =
ARRAY_SIZE(kvm_vmx_exit_handlers);
-static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
- u64 *info1, u64 *info2,
- u32 *intr_info, u32 *error_code)
+void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6417,6 +6427,24 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
pr_err("Virtual processor ID = 0x%04x\n",
vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ if (secondary_exec_control & SECONDARY_EXEC_EPT_VIOLATION_VE) {
+ struct vmx_ve_information *ve_info = vmx->ve_info;
+ u64 ve_info_pa = vmcs_read64(VE_INFORMATION_ADDRESS);
+
+ /*
+ * If KVM is dumping the VMCS, then something has gone wrong
+ * already. Derefencing an address from the VMCS, which could
+ * very well be corrupted, is a terrible idea. The virtual
+ * address is known so use it.
+ */
+ pr_err("VE info address = 0x%016llx%s\n", ve_info_pa,
+ ve_info_pa == __pa(ve_info) ? "" : "(corrupted!)");
+ pr_err("ve_info: 0x%08x 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%04x\n",
+ ve_info->exit_reason, ve_info->delivery,
+ ve_info->exit_qualification,
+ ve_info->guest_linear_address,
+ ve_info->guest_physical_address, ve_info->eptp_index);
+ }
}
/*
@@ -6602,7 +6630,7 @@ unexpected_vmexit:
return 0;
}
-static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
+int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
int ret = __vmx_handle_exit(vcpu, exit_fastpath);
@@ -6690,7 +6718,7 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
: "eax", "ebx", "ecx", "edx");
}
-static void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
+void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
int tpr_threshold;
@@ -6760,7 +6788,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
vmx_update_msr_bitmap_x2apic(vcpu);
}
-static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
+void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
{
const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT;
struct kvm *kvm = vcpu->kvm;
@@ -6829,7 +6857,7 @@ out:
kvm_release_pfn_clean(pfn);
}
-static void vmx_hwapic_isr_update(int max_isr)
+void vmx_hwapic_isr_update(int max_isr)
{
u16 status;
u8 old;
@@ -6863,7 +6891,7 @@ static void vmx_set_rvi(int vector)
}
}
-static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
+void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
/*
* When running L2, updating RVI is only relevant when
@@ -6877,7 +6905,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
vmx_set_rvi(max_irr);
}
-static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
@@ -6923,7 +6951,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
return;
@@ -6934,7 +6962,7 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
-static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
+void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6965,24 +6993,22 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
-static void handle_exception_irqoff(struct vcpu_vmx *vmx)
+static void handle_exception_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
{
- u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
-
/* if exit due to PF check for async PF */
if (is_page_fault(intr_info))
- vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
+ vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
/* if exit due to NM, handle before interrupts are enabled */
else if (is_nm_fault(intr_info))
- handle_nm_fault_irqoff(&vmx->vcpu);
+ handle_nm_fault_irqoff(vcpu);
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
}
-static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
+static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
+ u32 intr_info)
{
- u32 intr_info = vmx_get_intr_info(vcpu);
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
@@ -6999,7 +7025,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
vcpu->arch.at_instruction_boundary = true;
}
-static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7007,16 +7033,16 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
return;
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
- handle_external_interrupt_irqoff(vcpu);
+ handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu));
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
- handle_exception_irqoff(vmx);
+ handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu));
}
/*
* The kvm parameter can be NULL (module initialization, or invocation before
* VM creation). Be sure to check the kvm parameter before using it.
*/
-static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
+bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_SMBASE:
@@ -7139,7 +7165,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
IDT_VECTORING_ERROR_CODE);
}
-static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
+void vmx_cancel_injection(struct kvm_vcpu *vcpu)
{
__vmx_complete_interrupts(vcpu,
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
@@ -7309,7 +7335,7 @@ out:
guest_state_exit_irqoff();
}
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
+fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4;
@@ -7464,7 +7490,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
}
-static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
+void vmx_vcpu_free(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7473,9 +7499,10 @@ static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
free_vpid(vmx->vpid);
nested_vmx_free_vcpu(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
+ free_page((unsigned long)vmx->ve_info);
}
-static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
+int vmx_vcpu_create(struct kvm_vcpu *vcpu)
{
struct vmx_uret_msr *tsx_ctrl;
struct vcpu_vmx *vmx;
@@ -7566,6 +7593,20 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
goto free_vmcs;
}
+ err = -ENOMEM;
+ if (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_EPT_VIOLATION_VE) {
+ struct page *page;
+
+ BUILD_BUG_ON(sizeof(*vmx->ve_info) > PAGE_SIZE);
+
+ /* ve_info must be page aligned. */
+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!page)
+ goto free_vmcs;
+
+ vmx->ve_info = page_to_virt(page);
+ }
+
if (vmx_can_use_ipiv(vcpu))
WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id],
__pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
@@ -7584,7 +7625,7 @@ free_vpid:
#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
-static int vmx_vm_init(struct kvm *kvm)
+int vmx_vm_init(struct kvm *kvm)
{
if (!ple_gap)
kvm->arch.pause_in_guest = true;
@@ -7615,7 +7656,7 @@ static int vmx_vm_init(struct kvm *kvm)
return 0;
}
-static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
/* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
* memory aliases with conflicting memory types and sometimes MCEs.
@@ -7787,7 +7828,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
}
-static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8002,10 +8043,10 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
}
-static int vmx_check_intercept(struct kvm_vcpu *vcpu,
- struct x86_instruction_info *info,
- enum x86_intercept_stage stage,
- struct x86_exception *exception)
+int vmx_check_intercept(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -8085,8 +8126,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
return 0;
}
-static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
- bool *expired)
+int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
@@ -8125,13 +8166,13 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
return 0;
}
-static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
+void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
{
to_vmx(vcpu)->hv_deadline_tsc = -1;
}
#endif
-static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
if (!kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
@@ -8160,7 +8201,7 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
}
-static void vmx_setup_mce(struct kvm_vcpu *vcpu)
+void vmx_setup_mce(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.mcg_cap & MCG_LMCE_P)
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
@@ -8171,7 +8212,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_KVM_SMM
-static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
+int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
/* we need a nested vmexit to enter SMM, postpone if run is pending */
if (to_vmx(vcpu)->nested.nested_run_pending)
@@ -8179,7 +8220,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !is_smm(vcpu);
}
-static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
+int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8200,7 +8241,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
return 0;
}
-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
+int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
@@ -8221,18 +8262,18 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
return 0;
}
-static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
+void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
{
/* RSM will cause a vmexit anyway. */
}
#endif
-static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu);
}
-static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
+void vmx_migrate_timers(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu)) {
struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer;
@@ -8242,7 +8283,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
}
}
-static void vmx_hardware_unsetup(void)
+void vmx_hardware_unsetup(void)
{
kvm_set_posted_intr_wakeup_handler(NULL);
@@ -8252,18 +8293,7 @@ static void vmx_hardware_unsetup(void)
free_kvm_area();
}
-#define VMX_REQUIRED_APICV_INHIBITS \
-( \
- BIT(APICV_INHIBIT_REASON_DISABLE)| \
- BIT(APICV_INHIBIT_REASON_ABSENT) | \
- BIT(APICV_INHIBIT_REASON_HYPERV) | \
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
- BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
- BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
- BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \
-)
-
-static void vmx_vm_destroy(struct kvm *kvm)
+void vmx_vm_destroy(struct kvm *kvm)
{
struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
@@ -8314,148 +8344,6 @@ gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags
return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63));
}
-static struct kvm_x86_ops vmx_x86_ops __initdata = {
- .name = KBUILD_MODNAME,
-
- .check_processor_compatibility = vmx_check_processor_compat,
-
- .hardware_unsetup = vmx_hardware_unsetup,
-
- .hardware_enable = vmx_hardware_enable,
- .hardware_disable = vmx_hardware_disable,
- .has_emulated_msr = vmx_has_emulated_msr,
-
- .vm_size = sizeof(struct kvm_vmx),
- .vm_init = vmx_vm_init,
- .vm_destroy = vmx_vm_destroy,
-
- .vcpu_precreate = vmx_vcpu_precreate,
- .vcpu_create = vmx_vcpu_create,
- .vcpu_free = vmx_vcpu_free,
- .vcpu_reset = vmx_vcpu_reset,
-
- .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
- .vcpu_load = vmx_vcpu_load,
- .vcpu_put = vmx_vcpu_put,
-
- .update_exception_bitmap = vmx_update_exception_bitmap,
- .get_msr_feature = vmx_get_msr_feature,
- .get_msr = vmx_get_msr,
- .set_msr = vmx_set_msr,
- .get_segment_base = vmx_get_segment_base,
- .get_segment = vmx_get_segment,
- .set_segment = vmx_set_segment,
- .get_cpl = vmx_get_cpl,
- .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
- .is_valid_cr0 = vmx_is_valid_cr0,
- .set_cr0 = vmx_set_cr0,
- .is_valid_cr4 = vmx_is_valid_cr4,
- .set_cr4 = vmx_set_cr4,
- .set_efer = vmx_set_efer,
- .get_idt = vmx_get_idt,
- .set_idt = vmx_set_idt,
- .get_gdt = vmx_get_gdt,
- .set_gdt = vmx_set_gdt,
- .set_dr7 = vmx_set_dr7,
- .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
- .cache_reg = vmx_cache_reg,
- .get_rflags = vmx_get_rflags,
- .set_rflags = vmx_set_rflags,
- .get_if_flag = vmx_get_if_flag,
-
- .flush_tlb_all = vmx_flush_tlb_all,
- .flush_tlb_current = vmx_flush_tlb_current,
- .flush_tlb_gva = vmx_flush_tlb_gva,
- .flush_tlb_guest = vmx_flush_tlb_guest,
-
- .vcpu_pre_run = vmx_vcpu_pre_run,
- .vcpu_run = vmx_vcpu_run,
- .handle_exit = vmx_handle_exit,
- .skip_emulated_instruction = vmx_skip_emulated_instruction,
- .update_emulated_instruction = vmx_update_emulated_instruction,
- .set_interrupt_shadow = vmx_set_interrupt_shadow,
- .get_interrupt_shadow = vmx_get_interrupt_shadow,
- .patch_hypercall = vmx_patch_hypercall,
- .inject_irq = vmx_inject_irq,
- .inject_nmi = vmx_inject_nmi,
- .inject_exception = vmx_inject_exception,
- .cancel_injection = vmx_cancel_injection,
- .interrupt_allowed = vmx_interrupt_allowed,
- .nmi_allowed = vmx_nmi_allowed,
- .get_nmi_mask = vmx_get_nmi_mask,
- .set_nmi_mask = vmx_set_nmi_mask,
- .enable_nmi_window = vmx_enable_nmi_window,
- .enable_irq_window = vmx_enable_irq_window,
- .update_cr8_intercept = vmx_update_cr8_intercept,
- .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
- .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
- .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
- .load_eoi_exitmap = vmx_load_eoi_exitmap,
- .apicv_pre_state_restore = vmx_apicv_pre_state_restore,
- .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
- .hwapic_irr_update = vmx_hwapic_irr_update,
- .hwapic_isr_update = vmx_hwapic_isr_update,
- .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
- .sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_interrupt = vmx_deliver_interrupt,
- .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
-
- .set_tss_addr = vmx_set_tss_addr,
- .set_identity_map_addr = vmx_set_identity_map_addr,
- .get_mt_mask = vmx_get_mt_mask,
-
- .get_exit_info = vmx_get_exit_info,
-
- .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
-
- .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
-
- .get_l2_tsc_offset = vmx_get_l2_tsc_offset,
- .get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
- .write_tsc_offset = vmx_write_tsc_offset,
- .write_tsc_multiplier = vmx_write_tsc_multiplier,
-
- .load_mmu_pgd = vmx_load_mmu_pgd,
-
- .check_intercept = vmx_check_intercept,
- .handle_exit_irqoff = vmx_handle_exit_irqoff,
-
- .sched_in = vmx_sched_in,
-
- .cpu_dirty_log_size = PML_ENTITY_NUM,
- .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
-
- .nested_ops = &vmx_nested_ops,
-
- .pi_update_irte = vmx_pi_update_irte,
- .pi_start_assignment = vmx_pi_start_assignment,
-
-#ifdef CONFIG_X86_64
- .set_hv_timer = vmx_set_hv_timer,
- .cancel_hv_timer = vmx_cancel_hv_timer,
-#endif
-
- .setup_mce = vmx_setup_mce,
-
-#ifdef CONFIG_KVM_SMM
- .smi_allowed = vmx_smi_allowed,
- .enter_smm = vmx_enter_smm,
- .leave_smm = vmx_leave_smm,
- .enable_smi_window = vmx_enable_smi_window,
-#endif
-
- .check_emulate_instruction = vmx_check_emulate_instruction,
- .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
- .migrate_timers = vmx_migrate_timers,
-
- .msr_filter_changed = vmx_msr_filter_changed,
- .complete_emulated_msr = kvm_complete_insn_gp,
-
- .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
-
- .get_untagged_addr = vmx_get_untagged_addr,
-};
-
static unsigned int vmx_handle_intel_pt_intr(void)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
@@ -8521,9 +8409,7 @@ static void __init vmx_setup_me_spte_mask(void)
kvm_mmu_set_me_spte_mask(0, me_mask);
}
-static struct kvm_x86_init_ops vmx_init_ops __initdata;
-
-static __init int hardware_setup(void)
+__init int vmx_hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
@@ -8592,16 +8478,16 @@ static __init int hardware_setup(void)
* using the APIC_ACCESS_ADDR VMCS field.
*/
if (!flexpriority_enabled)
- vmx_x86_ops.set_apic_access_page_addr = NULL;
+ vt_x86_ops.set_apic_access_page_addr = NULL;
if (!cpu_has_vmx_tpr_shadow())
- vmx_x86_ops.update_cr8_intercept = NULL;
+ vt_x86_ops.update_cr8_intercept = NULL;
#if IS_ENABLED(CONFIG_HYPERV)
if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
&& enable_ept) {
- vmx_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
- vmx_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
+ vt_x86_ops.flush_remote_tlbs = hv_flush_remote_tlbs;
+ vt_x86_ops.flush_remote_tlbs_range = hv_flush_remote_tlbs_range;
}
#endif
@@ -8616,7 +8502,7 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_apicv())
enable_apicv = 0;
if (!enable_apicv)
- vmx_x86_ops.sync_pir_to_irr = NULL;
+ vt_x86_ops.sync_pir_to_irr = NULL;
if (!enable_apicv || !cpu_has_vmx_ipiv())
enable_ipiv = false;
@@ -8652,7 +8538,7 @@ static __init int hardware_setup(void)
enable_pml = 0;
if (!enable_pml)
- vmx_x86_ops.cpu_dirty_log_size = 0;
+ vt_x86_ops.cpu_dirty_log_size = 0;
if (!cpu_has_vmx_preemption_timer())
enable_preemption_timer = false;
@@ -8677,8 +8563,8 @@ static __init int hardware_setup(void)
}
if (!enable_preemption_timer) {
- vmx_x86_ops.set_hv_timer = NULL;
- vmx_x86_ops.cancel_hv_timer = NULL;
+ vt_x86_ops.set_hv_timer = NULL;
+ vt_x86_ops.cancel_hv_timer = NULL;
}
kvm_caps.supported_mce_cap |= MCG_LMCE_P;
@@ -8689,9 +8575,9 @@ static __init int hardware_setup(void)
if (!enable_ept || !enable_pmu || !cpu_has_vmx_intel_pt())
pt_mode = PT_MODE_SYSTEM;
if (pt_mode == PT_MODE_HOST_GUEST)
- vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
+ vt_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
else
- vmx_init_ops.handle_intel_pt_intr = NULL;
+ vt_init_ops.handle_intel_pt_intr = NULL;
setup_default_sgx_lepubkeyhash();
@@ -8714,14 +8600,6 @@ static __init int hardware_setup(void)
return r;
}
-static struct kvm_x86_init_ops vmx_init_ops __initdata = {
- .hardware_setup = hardware_setup,
- .handle_intel_pt_intr = NULL,
-
- .runtime_ops = &vmx_x86_ops,
- .pmu_ops = &intel_pmu_ops,
-};
-
static void vmx_cleanup_l1d_flush(void)
{
if (vmx_l1d_flush_pages) {
@@ -8763,7 +8641,7 @@ static int __init vmx_init(void)
*/
hv_init_evmcs();
- r = kvm_x86_vendor_init(&vmx_init_ops);
+ r = kvm_x86_vendor_init(&vt_init_ops);
if (r)
return r;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7e483366b31e..7b64e271a931 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -365,6 +365,9 @@ struct vcpu_vmx {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
} shadow_msr_intercept;
+
+ /* ve_info must be page aligned. */
+ struct vmx_ve_information *ve_info;
};
struct kvm_vmx {
@@ -577,7 +580,8 @@ static inline u8 vmx_get_rvi(void)
SECONDARY_EXEC_ENABLE_VMFUNC | \
SECONDARY_EXEC_BUS_LOCK_DETECTION | \
SECONDARY_EXEC_NOTIFY_VM_EXITING | \
- SECONDARY_EXEC_ENCLS_EXITING)
+ SECONDARY_EXEC_ENCLS_EXITING | \
+ SECONDARY_EXEC_EPT_VIOLATION_VE)
#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
new file mode 100644
index 000000000000..502704596c83
--- /dev/null
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_X86_OPS_H
+#define __KVM_X86_VMX_X86_OPS_H
+
+#include <linux/kvm_host.h>
+
+#include "x86.h"
+
+__init int vmx_hardware_setup(void);
+
+extern struct kvm_x86_ops vt_x86_ops __initdata;
+extern struct kvm_x86_init_ops vt_init_ops __initdata;
+
+void vmx_hardware_unsetup(void);
+int vmx_check_processor_compat(void);
+int vmx_hardware_enable(void);
+void vmx_hardware_disable(void);
+int vmx_vm_init(struct kvm *kvm);
+void vmx_vm_destroy(struct kvm *kvm);
+int vmx_vcpu_precreate(struct kvm *kvm);
+int vmx_vcpu_create(struct kvm_vcpu *vcpu);
+int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu);
+fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
+void vmx_vcpu_free(struct kvm_vcpu *vcpu);
+void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_put(struct kvm_vcpu *vcpu);
+int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath);
+void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu);
+int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu);
+void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu);
+int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
+#ifdef CONFIG_KVM_SMM
+int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection);
+int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
+void vmx_enable_smi_window(struct kvm_vcpu *vcpu);
+#endif
+int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
+int vmx_check_intercept(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ enum x86_intercept_stage stage,
+ struct x86_exception *exception);
+bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu);
+void vmx_migrate_timers(struct kvm_vcpu *vcpu);
+void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
+void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
+bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
+void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
+void vmx_hwapic_isr_update(int max_isr);
+bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu);
+int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu);
+void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
+void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
+bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
+void vmx_msr_filter_changed(struct kvm_vcpu *vcpu);
+void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
+int vmx_get_msr_feature(struct kvm_msr_entry *msr);
+int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
+u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg);
+void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+int vmx_get_cpl(struct kvm_vcpu *vcpu);
+void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
+bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
+void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val);
+void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu);
+void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
+void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+bool vmx_get_if_flag(struct kvm_vcpu *vcpu);
+void vmx_flush_tlb_all(struct kvm_vcpu *vcpu);
+void vmx_flush_tlb_current(struct kvm_vcpu *vcpu);
+void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr);
+void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu);
+void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
+u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
+void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall);
+void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected);
+void vmx_inject_nmi(struct kvm_vcpu *vcpu);
+void vmx_inject_exception(struct kvm_vcpu *vcpu);
+void vmx_cancel_injection(struct kvm_vcpu *vcpu);
+int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection);
+int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection);
+bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
+void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
+void vmx_enable_nmi_window(struct kvm_vcpu *vcpu);
+void vmx_enable_irq_window(struct kvm_vcpu *vcpu);
+void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr);
+void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu);
+void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
+void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
+int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
+u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+ u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
+u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
+u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
+void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
+void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
+void vmx_request_immediate_exit(struct kvm_vcpu *vcpu);
+void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu);
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_X86_64
+int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
+void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
+#endif
+void vmx_setup_mce(struct kvm_vcpu *vcpu);
+
+#endif /* __KVM_X86_VMX_X86_OPS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91478b769af0..082ac6d95a3a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -92,9 +92,12 @@
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
-struct kvm_caps kvm_caps __read_mostly = {
- .supported_mce_cap = MCG_CTL_P | MCG_SER_P,
-};
+/*
+ * Note, kvm_caps fields should *never* have default values, all fields must be
+ * recomputed from scratch during vendor module load, e.g. to account for a
+ * vendor module being reloaded with different module parameters.
+ */
+struct kvm_caps kvm_caps __read_mostly;
EXPORT_SYMBOL_GPL(kvm_caps);
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
@@ -2230,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
/*
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
* not support modifying the guest vCPU model on the fly, e.g. changing
- * the nVMX capabilities while L2 is running is nonsensical. Ignore
+ * the nVMX capabilities while L2 is running is nonsensical. Allow
* writes of the same value, e.g. to allow userspace to blindly stuff
* all MSRs when emulating RESET.
*/
- if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
- if (do_get_msr(vcpu, index, &val) || *data != val)
- return -EINVAL;
-
- return 0;
- }
+ if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
+ (do_get_msr(vcpu, index, &val) || *data != val))
+ return -EINVAL;
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
@@ -4629,9 +4629,7 @@ static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
static bool kvm_is_vm_type_supported(unsigned long type)
{
- return type == KVM_X86_DEFAULT_VM ||
- (type == KVM_X86_SW_PROTECTED_VM &&
- IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
+ return type < 32 && (kvm_caps.supported_vm_types & BIT(type));
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -4832,9 +4830,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_caps.has_notify_vmexit;
break;
case KVM_CAP_VM_TYPES:
- r = BIT(KVM_X86_DEFAULT_VM);
- if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM))
- r |= BIT(KVM_X86_SW_PROTECTED_VM);
+ r = kvm_caps.supported_vm_types;
break;
default:
break;
@@ -4842,46 +4838,44 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
-static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
+static int __kvm_x86_dev_get_attr(struct kvm_device_attr *attr, u64 *val)
{
- void __user *uaddr = (void __user*)(unsigned long)attr->addr;
-
- if ((u64)(unsigned long)uaddr != attr->addr)
- return ERR_PTR_USR(-EFAULT);
- return uaddr;
-}
-
-static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
-{
- u64 __user *uaddr = kvm_get_attr_addr(attr);
-
- if (attr->group)
+ if (attr->group) {
+ if (kvm_x86_ops.dev_get_attr)
+ return static_call(kvm_x86_dev_get_attr)(attr->group, attr->attr, val);
return -ENXIO;
-
- if (IS_ERR(uaddr))
- return PTR_ERR(uaddr);
+ }
switch (attr->attr) {
case KVM_X86_XCOMP_GUEST_SUPP:
- if (put_user(kvm_caps.supported_xcr0, uaddr))
- return -EFAULT;
+ *val = kvm_caps.supported_xcr0;
return 0;
default:
return -ENXIO;
}
}
+static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
+{
+ u64 __user *uaddr = u64_to_user_ptr(attr->addr);
+ int r;
+ u64 val;
+
+ r = __kvm_x86_dev_get_attr(attr, &val);
+ if (r < 0)
+ return r;
+
+ if (put_user(val, uaddr))
+ return -EFAULT;
+
+ return 0;
+}
+
static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
{
- if (attr->group)
- return -ENXIO;
+ u64 val;
- switch (attr->attr) {
- case KVM_X86_XCOMP_GUEST_SUPP:
- return 0;
- default:
- return -ENXIO;
- }
+ return __kvm_x86_dev_get_attr(attr, &val);
}
long kvm_arch_dev_ioctl(struct file *filp,
@@ -5557,11 +5551,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return 0;
}
-static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
- struct kvm_debugregs *dbgregs)
+static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *dbgregs)
{
unsigned int i;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
memset(dbgregs, 0, sizeof(*dbgregs));
BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
@@ -5570,6 +5568,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
dbgregs->dr6 = vcpu->arch.dr6;
dbgregs->dr7 = vcpu->arch.dr7;
+ return 0;
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
@@ -5577,6 +5576,10 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
{
unsigned int i;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (dbgregs->flags)
return -EINVAL;
@@ -5597,8 +5600,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
}
-static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
- u8 *state, unsigned int size)
+static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
{
/*
* Only copy state for features that are enabled for the guest. The
@@ -5616,24 +5619,25 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
XFEATURE_MASK_FPSSE;
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
supported_xcr0, vcpu->arch.pkru);
+ return 0;
}
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
- struct kvm_xsave *guest_xsave)
+static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
{
- kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
- sizeof(guest_xsave->region));
+ return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
+ sizeof(guest_xsave->region));
}
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return 0;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
@@ -5641,18 +5645,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
&vcpu->arch.pkru);
}
-static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
- struct kvm_xcrs *guest_xcrs)
+static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *guest_xcrs)
{
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
guest_xcrs->nr_xcrs = 0;
- return;
+ return 0;
}
guest_xcrs->nr_xcrs = 1;
guest_xcrs->flags = 0;
guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
+ return 0;
}
static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
@@ -5660,6 +5669,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
{
int i, r = 0;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -EINVAL;
@@ -5712,12 +5725,9 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = kvm_get_attr_addr(attr);
+ u64 __user *uaddr = u64_to_user_ptr(attr->addr);
int r;
- if (IS_ERR(uaddr))
- return PTR_ERR(uaddr);
-
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET:
r = -EFAULT;
@@ -5735,13 +5745,10 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = kvm_get_attr_addr(attr);
+ u64 __user *uaddr = u64_to_user_ptr(attr->addr);
struct kvm *kvm = vcpu->kvm;
int r;
- if (IS_ERR(uaddr))
- return PTR_ERR(uaddr);
-
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET: {
u64 offset, tsc, ns;
@@ -6048,7 +6055,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_DEBUGREGS: {
struct kvm_debugregs dbgregs;
- kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
+ r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, &dbgregs,
@@ -6078,7 +6087,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xsave)
break;
- kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
+ r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
@@ -6107,7 +6118,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xsave)
break;
- kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+ r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xsave, size))
@@ -6123,7 +6136,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!u.xcrs)
break;
- kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
+ r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
+ if (r < 0)
+ break;
r = -EFAULT;
if (copy_to_user(argp, u.xcrs,
@@ -6267,6 +6282,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_GET_SREGS2: {
+ r = -EINVAL;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ goto out;
+
u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
r = -ENOMEM;
if (!u.sregs2)
@@ -6279,6 +6299,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_SREGS2: {
+ r = -EINVAL;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ goto out;
+
u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
if (IS_ERR(u.sregs2)) {
r = PTR_ERR(u.sregs2);
@@ -9732,6 +9757,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
return -EIO;
}
+ memset(&kvm_caps, 0, sizeof(kvm_caps));
+
x86_emulator_cache = kvm_alloc_emulator_cache();
if (!x86_emulator_cache) {
pr_err("failed to allocate cache for x86 emulator\n");
@@ -9750,6 +9777,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (r)
goto out_free_percpu;
+ kvm_caps.supported_vm_types = BIT(KVM_X86_DEFAULT_VM);
+ kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
+
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@ -9795,6 +9825,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
+ if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled)
+ kvm_caps.supported_vm_types |= BIT(KVM_X86_SW_PROTECTED_VM);
+
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
@@ -9995,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
static void kvm_apicv_init(struct kvm *kvm)
{
- unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons;
+ enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
+ APICV_INHIBIT_REASON_DISABLE;
- init_rwsem(&kvm->arch.apicv_update_lock);
-
- set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
+ set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
- if (!enable_apicv)
- set_or_clear_apicv_inhibit(inhibits,
- APICV_INHIBIT_REASON_DISABLE, true);
+ init_rwsem(&kvm->arch.apicv_update_lock);
}
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
@@ -10051,26 +10081,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
+unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl)
{
- unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit;
-
- if (kvm_xen_hypercall_enabled(vcpu->kvm))
- return kvm_xen_hypercall(vcpu);
-
- if (kvm_hv_hypercall_enabled(vcpu))
- return kvm_hv_hypercall(vcpu);
-
- nr = kvm_rax_read(vcpu);
- a0 = kvm_rbx_read(vcpu);
- a1 = kvm_rcx_read(vcpu);
- a2 = kvm_rdx_read(vcpu);
- a3 = kvm_rsi_read(vcpu);
+ unsigned long ret;
trace_kvm_hypercall(nr, a0, a1, a2, a3);
- op_64_bit = is_64_bit_hypercall(vcpu);
if (!op_64_bit) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
@@ -10079,7 +10098,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
a3 &= 0xFFFFFFFF;
}
- if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
+ if (cpl) {
ret = -KVM_EPERM;
goto out;
}
@@ -10140,18 +10159,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
vcpu->arch.complete_userspace_io = complete_hypercall_exit;
+ /* stat is incremented on completion. */
return 0;
}
default:
ret = -KVM_ENOSYS;
break;
}
+
out:
+ ++vcpu->stat.hypercalls;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
+
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr, a0, a1, a2, a3, ret;
+ int op_64_bit;
+ int cpl;
+
+ if (kvm_xen_hypercall_enabled(vcpu->kvm))
+ return kvm_xen_hypercall(vcpu);
+
+ if (kvm_hv_hypercall_enabled(vcpu))
+ return kvm_hv_hypercall(vcpu);
+
+ nr = kvm_rax_read(vcpu);
+ a0 = kvm_rbx_read(vcpu);
+ a1 = kvm_rcx_read(vcpu);
+ a2 = kvm_rdx_read(vcpu);
+ a3 = kvm_rsi_read(vcpu);
+ op_64_bit = is_64_bit_hypercall(vcpu);
+ cpl = static_call(kvm_x86_get_cpl)(vcpu);
+
+ ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
+ if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
+ /* MAP_GPA tosses the request to the user space. */
+ return 0;
+
if (!op_64_bit)
ret = (u32)ret;
kvm_rax_write(vcpu, ret);
- ++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
@@ -11486,6 +11536,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__get_regs(vcpu, regs);
vcpu_put(vcpu);
@@ -11527,6 +11581,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__set_regs(vcpu, regs);
vcpu_put(vcpu);
@@ -11599,6 +11657,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
__get_sregs(vcpu, sregs);
vcpu_put(vcpu);
@@ -11866,6 +11928,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
{
int ret;
+ if (vcpu->kvm->arch.has_protected_state &&
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
vcpu_load(vcpu);
ret = __set_sregs(vcpu, sregs);
vcpu_put(vcpu);
@@ -11983,7 +12049,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
struct fxregs_state *fxsave;
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return 0;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
vcpu_load(vcpu);
@@ -12006,7 +12072,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
struct fxregs_state *fxsave;
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
- return 0;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
vcpu_load(vcpu);
@@ -12532,6 +12598,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return -EINVAL;
kvm->arch.vm_type = type;
+ kvm->arch.has_private_mem =
+ (type == KVM_X86_SW_PROTECTED_VM);
ret = kvm_page_track_init(kvm);
if (ret)
@@ -12731,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot)
int i;
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- kvfree(slot->arch.rmap[i]);
+ vfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL;
}
}
@@ -12743,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
- kvfree(slot->arch.lpage_info[i - 1]);
+ vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
@@ -12835,7 +12903,7 @@ out_free:
memslot_rmap_free(slot);
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
- kvfree(slot->arch.lpage_info[i - 1]);
+ vfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
return -ENOMEM;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a8b71803777b..d80a4c6b5a38 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -24,6 +24,8 @@ struct kvm_caps {
bool has_bus_lock_exit;
/* notify VM exit supported? */
bool has_notify_vmexit;
+ /* bit mask of VM types */
+ u32 supported_vm_types;
u64 supported_mce_cap;
u64 supported_xcr0;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6b43b6480354..a51f22c90a7a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -7,6 +7,7 @@
#include <linux/swapops.h>
#include <linux/kmemleak.h>
#include <linux/sched/task.h>
+#include <linux/execmem.h>
#include <asm/set_memory.h>
#include <asm/cpu_device_id.h>
@@ -1095,3 +1096,31 @@ unsigned long arch_max_swapfile_size(void)
return pages;
}
#endif
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ unsigned long start, offset = 0;
+
+ if (kaslr_enabled())
+ offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+
+ start = MODULES_VADDR + offset;
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */