diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/maccess.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 34 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_amd.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 94 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 58 |
5 files changed, 162 insertions, 79 deletions
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 5a53c2cc169c..6993f026adec 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,12 +9,21 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) unsigned long vaddr = (unsigned long)unsafe_src; /* - * Range covering the highest possible canonical userspace address - * as well as non-canonical address range. For the canonical range - * we also need to include the userspace guard page. + * Do not allow userspace addresses. This disallows + * normal userspace and the userspace guard page: */ - return vaddr >= TASK_SIZE_MAX + PAGE_SIZE && - __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits); + if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) + return false; + + /* + * Allow everything during early boot before 'x86_virt_bits' + * is initialized. Needed for instruction decoding in early + * exception handlers. + */ + if (!boot_cpu_data.x86_virt_bits) + return true; + + return __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits); } #else bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 9f27e14e185f..c290c55b632b 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -12,6 +12,7 @@ #include <linux/swiotlb.h> #include <linux/cc_platform.h> #include <linux/mem_encrypt.h> +#include <linux/virtio_anchor.h> /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ bool force_dma_unencrypted(struct device *dev) @@ -86,3 +87,36 @@ void __init mem_encrypt_init(void) print_mem_encrypt_feature_info(); } + +void __init mem_encrypt_setup_arch(void) +{ + phys_addr_t total_mem = memblock_phys_mem_size(); + unsigned long size; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * For SEV and TDX, all DMA has to occur via shared/unencrypted pages. + * Kernel uses SWIOTLB to make this happen without changing device + * drivers. However, depending on the workload being run, the + * default 64MB of SWIOTLB may not be enough and SWIOTLB may + * run out of buffers for DMA, resulting in I/O errors and/or + * performance degradation especially with high I/O workloads. + * + * Adjust the default size of SWIOTLB using a percentage of guest + * memory for SWIOTLB buffers. Also, as the SWIOTLB bounce buffer + * memory is allocated from low memory, ensure that the adjusted size + * is within the limits of low available memory. + * + * The percentage of guest memory used here for SWIOTLB buffers + * is more of an approximation of the static adjustment which + * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6% + */ + size = total_mem * 6 / 100; + size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G); + swiotlb_adjust_size(size); + + /* Set restricted memory access for virtio. */ + virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); +} diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 6faea41e99b6..a68f2dda0948 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -19,8 +19,6 @@ #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/dma-mapping.h> -#include <linux/virtio_config.h> -#include <linux/virtio_anchor.h> #include <linux/cc_platform.h> #include <asm/tlbflush.h> @@ -215,40 +213,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sev_setup_arch(void) -{ - phys_addr_t total_mem = memblock_phys_mem_size(); - unsigned long size; - - if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - return; - - /* - * For SEV, all DMA has to occur via shared/unencrypted pages. - * SEV uses SWIOTLB to make this happen without changing device - * drivers. However, depending on the workload being run, the - * default 64MB of SWIOTLB may not be enough and SWIOTLB may - * run out of buffers for DMA, resulting in I/O errors and/or - * performance degradation especially with high I/O workloads. - * - * Adjust the default size of SWIOTLB for SEV guests using - * a percentage of guest memory for SWIOTLB buffers. - * Also, as the SWIOTLB bounce buffer memory is allocated - * from low memory, ensure that the adjusted size is within - * the limits of low available memory. - * - * The percentage of guest memory used here for SWIOTLB buffers - * is more of an approximation of the static adjustment which - * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6% - */ - size = total_mem * 6 / 100; - size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G); - swiotlb_adjust_size(size); - - /* Set restricted memory access for virtio. */ - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); -} - static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) { unsigned long pfn = 0; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2aadb2019b4f..b29ceb19e46e 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -3,6 +3,7 @@ #include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/string.h> #include <linux/init.h> #include <linux/memblock.h> @@ -11,6 +12,7 @@ #include <linux/nodemask.h> #include <linux/sched.h> #include <linux/topology.h> +#include <linux/sort.h> #include <asm/e820/api.h> #include <asm/proto.h> @@ -56,7 +58,7 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = { int numa_cpu_node(int cpu) { - int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) return __apicid_to_node[apicid]; @@ -601,13 +603,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) if (start >= end) continue; - /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - continue; - alloc_node_data(nid); } @@ -733,6 +728,8 @@ void __init x86_numa_init(void) if (!numa_init(amd_numa_init)) return; #endif + if (acpi_disabled && !numa_init(of_numa_init)) + return; } numa_init(dummy_numa_init); @@ -786,7 +783,7 @@ void __init init_gi_nodes(void) void __init init_cpu_to_node(void) { int cpu; - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); @@ -961,4 +958,83 @@ int memory_add_physaddr_to_nid(u64 start) return nid; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + +static int __init cmp_memblk(const void *a, const void *b) +{ + const struct numa_memblk *ma = *(const struct numa_memblk **)a; + const struct numa_memblk *mb = *(const struct numa_memblk **)b; + + return ma->start - mb->start; +} + +static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; + +/** + * numa_fill_memblks - Fill gaps in numa_meminfo memblks + * @start: address to begin fill + * @end: address to end fill + * + * Find and extend numa_meminfo memblks to cover the @start-@end + * physical address range, such that the first memblk includes + * @start, the last memblk includes @end, and any gaps in between + * are filled. + * + * RETURNS: + * 0 : Success + * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + */ + +int __init numa_fill_memblks(u64 start, u64 end) +{ + struct numa_memblk **blk = &numa_memblk_list[0]; + struct numa_meminfo *mi = &numa_meminfo; + int count = 0; + u64 prev_end; + + /* + * Create a list of pointers to numa_meminfo memblks that + * overlap start, end. Exclude (start == bi->end) since + * end addresses in both a CFMWS range and a memblk range + * are exclusive. + * + * This list of pointers is used to make in-place changes + * that fill out the numa_meminfo memblks. + */ + for (int i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *bi = &mi->blk[i]; + + if (start < bi->end && end >= bi->start) { + blk[count] = &mi->blk[i]; + count++; + } + } + if (!count) + return NUMA_NO_MEMBLK; + + /* Sort the list of pointers in memblk->start order */ + sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL); + + /* Make sure the first/last memblks include start/end */ + blk[0]->start = min(blk[0]->start, start); + blk[count - 1]->end = max(blk[count - 1]->end, end); + + /* + * Fill any gaps by tracking the previous memblks + * end address and backfilling to it if needed. + */ + prev_end = blk[0]->end; + for (int i = 1; i < count; i++) { + struct numa_memblk *curr = blk[i]; + + if (prev_end >= curr->start) { + if (prev_end < curr->end) + prev_end = curr->end; + } else { + curr->start = prev_end; + prev_end = curr->end; + } + } + return 0; +} + #endif diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 78414c6d1b5e..5dd733944629 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -69,6 +69,7 @@ static void __init pti_print_if_secure(const char *reason) pr_info("%s\n", reason); } +/* Assume mode is auto unless overridden via cmdline below. */ static enum pti_mode { PTI_AUTO = 0, PTI_FORCE_OFF, @@ -77,50 +78,49 @@ static enum pti_mode { void __init pti_check_boottime_disable(void) { - char arg[5]; - int ret; - - /* Assume mode is auto unless overridden. */ - pti_mode = PTI_AUTO; - if (hypervisor_is_type(X86_HYPER_XEN_PV)) { pti_mode = PTI_FORCE_OFF; pti_print_if_insecure("disabled on XEN PV."); return; } - ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); - if (ret > 0) { - if (ret == 3 && !strncmp(arg, "off", 3)) { - pti_mode = PTI_FORCE_OFF; - pti_print_if_insecure("disabled on command line."); - return; - } - if (ret == 2 && !strncmp(arg, "on", 2)) { - pti_mode = PTI_FORCE_ON; - pti_print_if_secure("force enabled on command line."); - goto enable; - } - if (ret == 4 && !strncmp(arg, "auto", 4)) { - pti_mode = PTI_AUTO; - goto autosel; - } - } - - if (cmdline_find_option_bool(boot_command_line, "nopti") || - cpu_mitigations_off()) { + if (cpu_mitigations_off()) pti_mode = PTI_FORCE_OFF; + if (pti_mode == PTI_FORCE_OFF) { pti_print_if_insecure("disabled on command line."); return; } -autosel: - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (pti_mode == PTI_FORCE_ON) + pti_print_if_secure("force enabled on command line."); + + if (pti_mode == PTI_AUTO && !boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return; -enable: + setup_force_cpu_cap(X86_FEATURE_PTI); } +static int __init pti_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + pti_mode = PTI_FORCE_OFF; + else if (!strcmp(arg, "on")) + pti_mode = PTI_FORCE_ON; + else if (!strcmp(arg, "auto")) + pti_mode = PTI_AUTO; + else + return -EINVAL; + return 0; +} +early_param("pti", pti_parse_cmdline); + +static int __init pti_parse_cmdline_nopti(char *arg) +{ + pti_mode = PTI_FORCE_OFF; + return 0; +} +early_param("nopti", pti_parse_cmdline_nopti); + pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { /* |