diff options
-rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 9 | ||||
-rw-r--r-- | arch/loongarch/include/asm/kasan.h | 13 | ||||
-rw-r--r-- | arch/loongarch/include/asm/page.h | 5 | ||||
-rw-r--r-- | arch/loongarch/kernel/acpi.c | 81 | ||||
-rw-r--r-- | arch/loongarch/kernel/paravirt.c | 15 | ||||
-rw-r--r-- | arch/loongarch/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/loongarch/mm/kasan_init.c | 46 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/core/mr.c | 8 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 2 | ||||
-rw-r--r-- | fs/nilfs2/gcinode.c | 4 | ||||
-rw-r--r-- | fs/nilfs2/mdt.c | 1 | ||||
-rw-r--r-- | fs/nilfs2/page.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 13 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 12 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 2 | ||||
-rw-r--r-- | include/net/tls.h | 12 | ||||
-rw-r--r-- | mm/gup.c | 116 | ||||
-rw-r--r-- | mm/huge_memory.c | 4 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 15 | ||||
-rw-r--r-- | mm/page_io.c | 16 | ||||
-rw-r--r-- | mm/swap.c | 14 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zswap.c | 6 | ||||
-rw-r--r-- | net/core/filter.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/verifier_bits_iter.c | 32 | ||||
-rw-r--r-- | tools/testing/selftests/mm/hugetlb_dio.c | 7 |
29 files changed, 326 insertions, 126 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 69af2173555f..6d02168d78be 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1599,6 +1599,15 @@ The following nested keys are defined. pglazyfreed (npn) Amount of reclaimed lazyfree pages + swpin_zero + Number of pages swapped into memory and filled with zero, where I/O + was optimized out because the page content was detected to be zero + during swapout. + + swpout_zero + Number of zero-filled pages swapped out with I/O skipped due to the + content being detected as zero. + zswpin Number of pages moved in to memory from zswap. diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index c6bce5fbff57..7f52bd31b9d4 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -25,6 +25,7 @@ /* 64-bit segment value. */ #define XKPRANGE_UC_SEG (0x8000) #define XKPRANGE_CC_SEG (0x9000) +#define XKPRANGE_WC_SEG (0xa000) #define XKVRANGE_VC_SEG (0xffff) /* Cached */ @@ -41,20 +42,28 @@ #define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) #define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) +/* WriteCombine */ +#define XKPRANGE_WC_START WRITECOMBINE_BASE +#define XKPRANGE_WC_SIZE XRANGE_SIZE +#define XKPRANGE_WC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKPRANGE_WC_SHADOW_SIZE (XKPRANGE_WC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_WC_SHADOW_END (XKPRANGE_WC_KASAN_OFFSET + XKPRANGE_WC_SHADOW_SIZE) + /* VMALLOC (Cached or UnCached) */ #define XKVRANGE_VC_START MODULES_VADDR #define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE) -#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_WC_SHADOW_END #define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) #define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) /* KAsan shadow memory start right after vmalloc. */ #define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE) #define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) -#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) +#define KASAN_SHADOW_END (round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) - 1) #define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) #define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKPRANGE_WC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_WC_KASAN_OFFSET) #define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) extern bool kasan_early_stage; diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index e85df33f11c7..8f21567a3188 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -113,10 +113,7 @@ struct page *tlb_virt_to_page(unsigned long kaddr); extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index f1a74b80f22c..382a09a7152c 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -58,48 +58,48 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -static int cpu_enumerated = 0; - #ifdef CONFIG_SMP -static int set_processor_mask(u32 id, u32 flags) +static int set_processor_mask(u32 id, u32 pass) { - int nr_cpus; - int cpu, cpuid = id; - - if (!cpu_enumerated) - nr_cpus = NR_CPUS; - else - nr_cpus = nr_cpu_ids; + int cpu = -1, cpuid = id; - if (num_processors >= nr_cpus) { + if (num_processors >= NR_CPUS) { pr_warn(PREFIX "nr_cpus limit of %i reached." - " processor 0x%x ignored.\n", nr_cpus, cpuid); + " processor 0x%x ignored.\n", NR_CPUS, cpuid); return -ENODEV; } + if (cpuid == loongson_sysconf.boot_cpu_id) cpu = 0; - else - cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS); - - if (!cpu_enumerated) - set_cpu_possible(cpu, true); - if (flags & ACPI_MADT_ENABLED) { + switch (pass) { + case 1: /* Pass 1 handle enabled processors */ + if (cpu < 0) + cpu = find_first_zero_bit(cpumask_bits(cpu_present_mask), NR_CPUS); num_processors++; set_cpu_present(cpu, true); - __cpu_number_map[cpuid] = cpu; - __cpu_logical_map[cpu] = cpuid; - } else + break; + case 2: /* Pass 2 handle disabled processors */ + if (cpu < 0) + cpu = find_first_zero_bit(cpumask_bits(cpu_possible_mask), NR_CPUS); disabled_cpus++; + break; + default: + return cpu; + } + + set_cpu_possible(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; return cpu; } #endif static int __init -acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long end) +acpi_parse_p1_processor(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_core_pic *processor = NULL; @@ -110,13 +110,30 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_SMP acpi_core_pic[processor->core_id] = *processor; - set_processor_mask(processor->core_id, processor->flags); + if (processor->flags & ACPI_MADT_ENABLED) + set_processor_mask(processor->core_id, 1); #endif return 0; } static int __init +acpi_parse_p2_processor(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_core_pic *processor = NULL; + + processor = (struct acpi_madt_core_pic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + +#ifdef CONFIG_SMP + if (!(processor->flags & ACPI_MADT_ENABLED)) + set_processor_mask(processor->core_id, 2); +#endif + + return 0; +} +static int __init acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long end) { static int core = 0; @@ -143,12 +160,14 @@ static void __init acpi_process_madt(void) } #endif acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, - acpi_parse_processor, MAX_CORE_PIC); + acpi_parse_p1_processor, MAX_CORE_PIC); + + acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, + acpi_parse_p2_processor, MAX_CORE_PIC); acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, acpi_parse_eio_master, MAX_IO_PICS); - cpu_enumerated = 1; loongson_sysconf.nr_cpus = num_processors; } @@ -310,6 +329,10 @@ static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) int nid; nid = acpi_get_node(handle); + + if (nid != NUMA_NO_NODE) + nid = early_cpu_to_node(cpu); + if (nid != NUMA_NO_NODE) { set_cpuid_to_node(physid, nid); node_set(nid, numa_nodes_parsed); @@ -324,12 +347,14 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu { int cpu; - cpu = set_processor_mask(physid, ACPI_MADT_ENABLED); - if (cpu < 0) { + cpu = cpu_number_map(physid); + if (cpu < 0 || cpu >= nr_cpu_ids) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); - return cpu; + return -ERANGE; } + num_processors++; + set_cpu_present(cpu, true); acpi_map_cpu2node(handle, cpu, physid); *pcpu = cpu; diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index a5fc61f8b348..e5a39bbad078 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -51,11 +51,18 @@ static u64 paravt_steal_clock(int cpu) } #ifdef CONFIG_SMP +static struct smp_ops native_ops; + static void pv_send_ipi_single(int cpu, unsigned int action) { int min, old; irq_cpustat_t *info = &per_cpu(irq_stat, cpu); + if (unlikely(action == ACTION_BOOT_CPU)) { + native_ops.send_ipi_single(cpu, action); + return; + } + old = atomic_fetch_or(BIT(action), &info->message); if (old) return; @@ -75,6 +82,11 @@ static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) if (cpumask_empty(mask)) return; + if (unlikely(action == ACTION_BOOT_CPU)) { + native_ops.send_ipi_mask(mask, action); + return; + } + action = BIT(action); for_each_cpu(i, mask) { info = &per_cpu(irq_stat, i); @@ -147,6 +159,8 @@ static void pv_init_ipi(void) { int r, swi; + /* Init native ipi irq for ACTION_BOOT_CPU */ + native_ops.init_ipi(); swi = get_percpu_irq(INT_SWI0); if (swi < 0) panic("SWI0 IRQ mapping failed\n"); @@ -193,6 +207,7 @@ int __init pv_ipi_init(void) return 0; #ifdef CONFIG_SMP + native_ops = mp_ops; mp_ops.init_ipi = pv_init_ipi; mp_ops.send_ipi_single = pv_send_ipi_single; mp_ops.send_ipi_mask = pv_send_ipi_mask; diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 9afc2d8b3414..5d59e9ce2772 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -302,7 +302,7 @@ static void __init fdt_smp_setup(void) __cpu_number_map[cpuid] = cpu; __cpu_logical_map[cpu] = cpuid; - early_numa_add_cpu(cpu, 0); + early_numa_add_cpu(cpuid, 0); set_cpuid_to_node(cpuid, 0); } @@ -331,11 +331,11 @@ void __init loongson_prepare_cpus(unsigned int max_cpus) int i = 0; parse_acpi_topology(); + cpu_data[0].global_id = cpu_logical_map(0); for (i = 0; i < loongson_sysconf.nr_cpus; i++) { set_cpu_present(i, true); csr_mail_send(0, __cpu_logical_map[i], 0); - cpu_data[i].global_id = __cpu_logical_map[i]; } per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -380,6 +380,7 @@ void loongson_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core : cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; + cpu_data[cpu].global_id = cpu_logical_map(cpu); } void loongson_smp_finish(void) diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 427d6b1aec09..d2681272d8f0 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -13,6 +13,13 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +#ifdef __PAGETABLE_P4D_FOLDED +#define __pgd_none(early, pgd) (0) +#else +#define __pgd_none(early, pgd) (early ? (pgd_val(pgd) == 0) : \ +(__pa(pgd_val(pgd)) == (unsigned long)__pa(kasan_early_shadow_p4d))) +#endif + #ifdef __PAGETABLE_PUD_FOLDED #define __p4d_none(early, p4d) (0) #else @@ -55,6 +62,9 @@ void *kasan_mem_to_shadow(const void *addr) case XKPRANGE_UC_SEG: offset = XKPRANGE_UC_SHADOW_OFFSET; break; + case XKPRANGE_WC_SEG: + offset = XKPRANGE_WC_SHADOW_OFFSET; + break; case XKVRANGE_VC_SEG: offset = XKVRANGE_VC_SHADOW_OFFSET; break; @@ -79,6 +89,8 @@ const void *kasan_shadow_to_mem(const void *shadow_addr) if (addr >= XKVRANGE_VC_SHADOW_OFFSET) return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_WC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_WC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_WC_START); else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) @@ -142,6 +154,19 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, return pud_offset(p4dp, addr); } +static p4d_t *__init kasan_p4d_offset(pgd_t *pgdp, unsigned long addr, int node, bool early) +{ + if (__pgd_none(early, pgdp_get(pgdp))) { + phys_addr_t p4d_phys = early ? + __pa_symbol(kasan_early_shadow_p4d) : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(p4d_phys), kasan_early_shadow_p4d, sizeof(kasan_early_shadow_p4d)); + pgd_populate(&init_mm, pgdp, (p4d_t *)__va(p4d_phys)); + } + + return p4d_offset(pgdp, addr); +} + static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, unsigned long end, int node, bool early) { @@ -178,19 +203,19 @@ static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, do { next = pud_addr_end(addr, end); kasan_pmd_populate(pudp, addr, next, node, early); - } while (pudp++, addr = next, addr != end); + } while (pudp++, addr = next, addr != end && __pud_none(early, READ_ONCE(*pudp))); } static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; - p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t *p4dp = kasan_p4d_offset(pgdp, addr, node, early); do { next = p4d_addr_end(addr, end); kasan_pud_populate(p4dp, addr, next, node, early); - } while (p4dp++, addr = next, addr != end); + } while (p4dp++, addr = next, addr != end && __p4d_none(early, READ_ONCE(*p4dp))); } static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, @@ -218,7 +243,7 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end, asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END + 1, PGDIR_SIZE)); } static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) @@ -233,7 +258,7 @@ static void __init clear_pgds(unsigned long start, unsigned long end) * swapper_pg_dir. pgd_clear() can't be used * here because it's nop on 2,3-level pagetable setups */ - for (; start < end; start += PGDIR_SIZE) + for (; start < end; start = pgd_addr_end(start, end)) kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); } @@ -243,6 +268,17 @@ void __init kasan_init(void) phys_addr_t pa_start, pa_end; /* + * If PGDIR_SIZE is too large for cpu_vabits, KASAN_SHADOW_END will + * overflow UINTPTR_MAX and then looks like a user space address. + * For example, PGDIR_SIZE of CONFIG_4KB_4LEVEL is 2^39, which is too + * large for Loongson-2K series whose cpu_vabits = 39. + */ + if (KASAN_SHADOW_END < vm_map_base) { + pr_warn("PGDIR_SIZE too large for cpu_vabits, KernelAddressSanitizer disabled.\n"); + return; + } + + /* * PGD was populated as invalid_pmd_table or invalid_pud_table * in pagetable_init() which depends on how many levels of page * table you are using, but we had to clean the gpd of kasan diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 2dd21e0b399e..7d0c83b5b071 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -373,7 +373,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr struct page *pg; unsigned int nsg; int sglen; - u64 pa; + u64 pa, offset; u64 paend; struct scatterlist *sg; struct device *dma = mvdev->vdev.dma_dev; @@ -396,8 +396,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr sg = mr->sg_head.sgl; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { - paend = map->addr + maplen(map, mr); - for (pa = map->addr; pa < paend; pa += sglen) { + offset = mr->start > map->start ? mr->start - map->start : 0; + pa = map->addr + offset; + paend = map->addr + offset + maplen(map, mr); + for (; pa < paend; pa += sglen) { pg = pfn_to_page(__phys_to_pfn(pa)); if (!sg) { mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 57b4af5ad646..501ad7be5174 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -68,7 +68,6 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); @@ -133,7 +132,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 1c9ae36a03ab..ace22253fed0 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -83,10 +83,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, goto out; } - if (!buffer_mapped(bh)) { - bh->b_bdev = inode->i_sb->s_bdev; + if (!buffer_mapped(bh)) set_buffer_mapped(bh); - } bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index ceb7dc0b5bad..2db6350b5ac2 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -89,7 +89,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 10def4b55995..6dd8b854cd1f 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -39,7 +39,6 @@ static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); bh = get_nth_bh(bh, block - first_block); - touch_buffer(bh); wait_on_buffer(bh); return bh; } @@ -64,6 +63,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, folio_put(folio); return NULL; } + bh->b_bdev = inode->i_sb->s_bdev; return bh; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 3d404624bb96..c79b4291777f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2319,6 +2319,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, struct ocfs2_blockcheck_stats *stats) { int status = -EAGAIN; + u32 blksz_bits; if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { @@ -2333,11 +2334,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, goto out; } status = -EINVAL; - if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { + /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */ + blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); + if (blksz_bits < 9 || blksz_bits > 12) { mlog(ML_ERROR, "found superblock with incorrect block " - "size: found %u, should be %u\n", - 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits), - blksz); + "size bits: found %u, should be 9, 10, 11, or 12\n", + blksz_bits); + } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) { + mlog(ML_ERROR, "found superblock with incorrect block " + "size: found %u, should be %u\n", 1 << blksz_bits, blksz); } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != OCFS2_MAJOR_REV_LEVEL || le16_to_cpu(di->id2.i_super.s_minor_rev_level) != diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 34d2da05f2f1..e1b41554a5fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1760,8 +1760,9 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { struct mem_cgroup *memcg; @@ -1770,7 +1771,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - count_memcg_events(memcg, idx, 1); + count_memcg_events(memcg, idx, count); rcu_read_unlock(); } @@ -1825,8 +1826,9 @@ static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) return NULL; } -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index aed952d04132..f70d0958095c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, + SWPIN_ZERO, + SWPOUT_ZERO, #ifdef CONFIG_KSM KSM_SWPIN_COPY, #endif diff --git a/include/net/tls.h b/include/net/tls.h index 3a33924db2bc..61fef2880114 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -390,8 +390,12 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx) static inline bool tls_sw_has_ctx_tx(const struct sock *sk) { - struct tls_context *ctx = tls_get_ctx(sk); + struct tls_context *ctx; + + if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk)) + return false; + ctx = tls_get_ctx(sk); if (!ctx) return false; return !!tls_sw_ctx_tx(ctx); @@ -399,8 +403,12 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk) static inline bool tls_sw_has_ctx_rx(const struct sock *sk) { - struct tls_context *ctx = tls_get_ctx(sk); + struct tls_context *ctx; + + if (!sk_is_inet(sk) || !inet_test_bit(IS_ICSK, sk)) + return false; + ctx = tls_get_ctx(sk); if (!ctx) return false; return !!tls_sw_ctx_rx(ctx); @@ -2273,20 +2273,57 @@ struct page *get_dump_page(unsigned long addr) #endif /* CONFIG_ELF_CORE */ #ifdef CONFIG_MIGRATION + +/* + * An array of either pages or folios ("pofs"). Although it may seem tempting to + * avoid this complication, by simply interpreting a list of folios as a list of + * pages, that approach won't work in the longer term, because eventually the + * layouts of struct page and struct folio will become completely different. + * Furthermore, this pof approach avoids excessive page_folio() calls. + */ +struct pages_or_folios { + union { + struct page **pages; + struct folio **folios; + void **entries; + }; + bool has_folios; + long nr_entries; +}; + +static struct folio *pofs_get_folio(struct pages_or_folios *pofs, long i) +{ + if (pofs->has_folios) + return pofs->folios[i]; + return page_folio(pofs->pages[i]); +} + +static void pofs_clear_entry(struct pages_or_folios *pofs, long i) +{ + pofs->entries[i] = NULL; +} + +static void pofs_unpin(struct pages_or_folios *pofs) +{ + if (pofs->has_folios) + unpin_folios(pofs->folios, pofs->nr_entries); + else + unpin_user_pages(pofs->pages, pofs->nr_entries); +} + /* * Returns the number of collected folios. Return value is always >= 0. */ static unsigned long collect_longterm_unpinnable_folios( - struct list_head *movable_folio_list, - unsigned long nr_folios, - struct folio **folios) + struct list_head *movable_folio_list, + struct pages_or_folios *pofs) { unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; - for (i = 0; i < nr_folios; i++) { - struct folio *folio = folios[i]; + for (i = 0; i < pofs->nr_entries; i++) { + struct folio *folio = pofs_get_folio(pofs, i); if (folio == prev_folio) continue; @@ -2327,16 +2364,15 @@ static unsigned long collect_longterm_unpinnable_folios( * Returns -EAGAIN if all folios were successfully migrated or -errno for * failure (or partial success). */ -static int migrate_longterm_unpinnable_folios( - struct list_head *movable_folio_list, - unsigned long nr_folios, - struct folio **folios) +static int +migrate_longterm_unpinnable_folios(struct list_head *movable_folio_list, + struct pages_or_folios *pofs) { int ret; unsigned long i; - for (i = 0; i < nr_folios; i++) { - struct folio *folio = folios[i]; + for (i = 0; i < pofs->nr_entries; i++) { + struct folio *folio = pofs_get_folio(pofs, i); if (folio_is_device_coherent(folio)) { /* @@ -2344,7 +2380,7 @@ static int migrate_longterm_unpinnable_folios( * convert the pin on the source folio to a normal * reference. */ - folios[i] = NULL; + pofs_clear_entry(pofs, i); folio_get(folio); gup_put_folio(folio, 1, FOLL_PIN); @@ -2363,8 +2399,8 @@ static int migrate_longterm_unpinnable_folios( * calling folio_isolate_lru() which takes a reference so the * folio won't be freed if it's migrating. */ - unpin_folio(folios[i]); - folios[i] = NULL; + unpin_folio(folio); + pofs_clear_entry(pofs, i); } if (!list_empty(movable_folio_list)) { @@ -2387,12 +2423,26 @@ static int migrate_longterm_unpinnable_folios( return -EAGAIN; err: - unpin_folios(folios, nr_folios); + pofs_unpin(pofs); putback_movable_pages(movable_folio_list); return ret; } +static long +check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) +{ + LIST_HEAD(movable_folio_list); + unsigned long collected; + + collected = collect_longterm_unpinnable_folios(&movable_folio_list, + pofs); + if (!collected) + return 0; + + return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); +} + /* * Check whether all folios are *allowed* to be pinned indefinitely (long term). * Rather confusingly, all folios in the range are required to be pinned via @@ -2417,16 +2467,13 @@ err: static long check_and_migrate_movable_folios(unsigned long nr_folios, struct folio **folios) { - unsigned long collected; - LIST_HEAD(movable_folio_list); + struct pages_or_folios pofs = { + .folios = folios, + .has_folios = true, + .nr_entries = nr_folios, + }; - collected = collect_longterm_unpinnable_folios(&movable_folio_list, - nr_folios, folios); - if (!collected) - return 0; - - return migrate_longterm_unpinnable_folios(&movable_folio_list, - nr_folios, folios); + return check_and_migrate_movable_pages_or_folios(&pofs); } /* @@ -2436,22 +2483,13 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios, static long check_and_migrate_movable_pages(unsigned long nr_pages, struct page **pages) { - struct folio **folios; - long i, ret; + struct pages_or_folios pofs = { + .pages = pages, + .has_folios = false, + .nr_entries = nr_pages, + }; - folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL); - if (!folios) { - unpin_user_pages(pages, nr_pages); - return -ENOMEM; - } - - for (i = 0; i < nr_pages; i++) - folios[i] = page_folio(pages[i]); - - ret = check_and_migrate_movable_folios(nr_pages, folios); - - kfree(folios); - return ret; + return check_and_migrate_movable_pages_or_folios(&pofs); } #else static long check_and_migrate_movable_pages(unsigned long nr_pages, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 03fd4bc39ea1..5734d5d5060f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3790,7 +3790,9 @@ next: * in the case it was underused, then consider it used and * don't add it back to split_queue. */ - if (!did_split && !folio_test_partially_mapped(folio)) { + if (did_split) { + ; /* folio already removed from list */ + } else if (!folio_test_partially_mapped(folio)) { list_del_init(&folio->_deferred_list); removed++; } else { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 06df2af97415..53db98d2c4a1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -431,6 +431,10 @@ static const unsigned int memcg_vm_event_stat[] = { PGDEACTIVATE, PGLAZYFREE, PGLAZYFREED, +#ifdef CONFIG_SWAP + SWPIN_ZERO, + SWPOUT_ZERO, +#endif #ifdef CONFIG_ZSWAP ZSWPIN, ZSWPOUT, diff --git a/mm/nommu.c b/mm/nommu.c index e9b5f527ab5b..9cb6e99215e2 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -573,7 +573,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma) VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start); vma_iter_config(&vmi, vma->vm_start, vma->vm_end); - if (vma_iter_prealloc(&vmi, vma)) { + if (vma_iter_prealloc(&vmi, NULL)) { pr_warn("Allocation of vma tree for process %d failed\n", current->pid); return -ENOMEM; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c6c7bb3ea71b..216fbbfbedcf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1048,6 +1048,7 @@ __always_inline bool free_pages_prepare(struct page *page, bool skip_kasan_poison = should_skip_kasan_poison(page); bool init = want_init_on_free(); bool compound = PageCompound(page); + struct folio *folio = page_folio(page); VM_BUG_ON_PAGE(PageTail(page), page); @@ -1057,6 +1058,20 @@ __always_inline bool free_pages_prepare(struct page *page, if (memcg_kmem_online() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); + /* + * In rare cases, when truncation or holepunching raced with + * munlock after VM_LOCKED was cleared, Mlocked may still be + * found set here. This does not indicate a problem, unless + * "unevictable_pgs_cleared" appears worryingly large. + */ + if (unlikely(folio_test_mlocked(folio))) { + long nr_pages = folio_nr_pages(folio); + + __folio_clear_mlocked(folio); + zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); + count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); + } + if (unlikely(PageHWPoison(page)) && !order) { /* Do not let hwpoison pages hit pcplists/buddy */ reset_page_owner(page, order); diff --git a/mm/page_io.c b/mm/page_io.c index 69536a2b3c13..01749b99fb54 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -204,7 +204,9 @@ static bool is_folio_zero_filled(struct folio *folio) static void swap_zeromap_folio_set(struct folio *folio) { + struct obj_cgroup *objcg = get_obj_cgroup_from_folio(folio); struct swap_info_struct *sis = swp_swap_info(folio->swap); + int nr_pages = folio_nr_pages(folio); swp_entry_t entry; unsigned int i; @@ -212,6 +214,12 @@ static void swap_zeromap_folio_set(struct folio *folio) entry = page_swap_entry(folio_page(folio, i)); set_bit(swp_offset(entry), sis->zeromap); } + + count_vm_events(SWPOUT_ZERO, nr_pages); + if (objcg) { + count_objcg_events(objcg, SWPOUT_ZERO, nr_pages); + obj_cgroup_put(objcg); + } } static void swap_zeromap_folio_clear(struct folio *folio) @@ -503,6 +511,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret) static bool swap_read_folio_zeromap(struct folio *folio) { int nr_pages = folio_nr_pages(folio); + struct obj_cgroup *objcg; bool is_zeromap; /* @@ -517,6 +526,13 @@ static bool swap_read_folio_zeromap(struct folio *folio) if (!is_zeromap) return false; + objcg = get_obj_cgroup_from_folio(folio); + count_vm_events(SWPIN_ZERO, nr_pages); + if (objcg) { + count_objcg_events(objcg, SWPIN_ZERO, nr_pages); + obj_cgroup_put(objcg); + } + folio_zero_range(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); return true; diff --git a/mm/swap.c b/mm/swap.c index b8e3259ea2c4..59f30a981c6f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -78,20 +78,6 @@ static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp, lruvec_del_folio(*lruvecp, folio); __folio_clear_lru_flags(folio); } - - /* - * In rare cases, when truncation or holepunching raced with - * munlock after VM_LOCKED was cleared, Mlocked may still be - * found set here. This does not indicate a problem, unless - * "unevictable_pgs_cleared" appears worryingly large. - */ - if (unlikely(folio_test_mlocked(folio))) { - long nr_pages = folio_nr_pages(folio); - - __folio_clear_mlocked(folio); - zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); - count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); - } } /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 46bd4b1a3c07..9c85bd46ab7f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -929,7 +929,7 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, si->highest_bit = 0; del_from_avail_list(si); - if (vm_swap_full()) + if (si->cluster_info && vm_swap_full()) schedule_work(&si->reclaim_work); } } diff --git a/mm/vmstat.c b/mm/vmstat.c index b5a4cea423e1..ac6a5aa34eab 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1415,6 +1415,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SWAP "swap_ra", "swap_ra_hit", + "swpin_zero", + "swpout_zero", #ifdef CONFIG_KSM "ksm_swpin_copy", #endif diff --git a/mm/zswap.c b/mm/zswap.c index 162013952074..0030ce8fecfc 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1053,7 +1053,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, count_vm_event(ZSWPWB); if (entry->objcg) - count_objcg_event(entry->objcg, ZSWPWB); + count_objcg_events(entry->objcg, ZSWPWB, 1); zswap_entry_free(entry); @@ -1483,7 +1483,7 @@ bool zswap_store(struct folio *folio) if (objcg) { obj_cgroup_charge_zswap(objcg, entry->length); - count_objcg_event(objcg, ZSWPOUT); + count_objcg_events(objcg, ZSWPOUT, 1); } /* @@ -1577,7 +1577,7 @@ bool zswap_load(struct folio *folio) count_vm_event(ZSWPIN); if (entry->objcg) - count_objcg_event(entry->objcg, ZSWPIN); + count_objcg_events(entry->objcg, ZSWPIN, 1); if (swapcache) { zswap_entry_free(entry); diff --git a/net/core/filter.c b/net/core/filter.c index e31ee8be2de0..fb56567c551e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2249,7 +2249,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); if (dst) IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index 156cc278e2fc..7c881bca9af5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -57,9 +57,15 @@ __description("null pointer") __success __retval(0) int null_pointer(void) { - int nr = 0; + struct bpf_iter_bits iter; + int err, nr = 0; int *bit; + err = bpf_iter_bits_new(&iter, NULL, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EINVAL) + return 1; + bpf_for_each(bits, bit, NULL, 1) nr++; return nr; @@ -194,15 +200,33 @@ __description("bad words") __success __retval(0) int bad_words(void) { - void *bad_addr = (void *)(3UL << 30); - int nr = 0; + void *bad_addr = (void *)-4095; + struct bpf_iter_bits iter; + volatile int nr; int *bit; + int err; + + err = bpf_iter_bits_new(&iter, bad_addr, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 1; + nr = 0; bpf_for_each(bits, bit, bad_addr, 1) nr++; + if (nr != 0) + return 2; + err = bpf_iter_bits_new(&iter, bad_addr, 4); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 3; + + nr = 0; bpf_for_each(bits, bit, bad_addr, 4) nr++; + if (nr != 0) + return 4; - return nr; + return 0; } diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index 60001c142ce9..432d5af15e66 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -44,6 +44,13 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) if (fd < 0) ksft_exit_fail_perror("Error opening file\n"); + /* Get the free huge pages before allocation */ + free_hpage_b = get_free_hugepages(); + if (free_hpage_b == 0) { + close(fd); + ksft_exit_skip("No free hugepage, exiting!\n"); + } + /* Allocate a hugetlb page */ orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); if (orig_buffer == MAP_FAILED) { |